Loading checker_test_file.txt +1 −1 Original line number Diff line number Diff line This is unambiguous. This is word odd so choice. Sorry for the inconvenience. Kenya, officially the Republic of Kenya No newline at end of file Kenya, officially the Republic of Kenya. No newline at end of file pom.xml +6 −0 Original line number Diff line number Diff line Loading @@ -20,6 +20,12 @@ <artifactId>jsoup</artifactId> <version>1.15.3</version> </dependency> <dependency> <groupId>de.sciss</groupId> <artifactId>dotterweide-ui_2.12</artifactId> <version>0.4.3</version> </dependency> </dependencies> </project> No newline at end of file src/main/java/edu/bu/LanguageCorrection/Checker.java +7 −4 Original line number Diff line number Diff line package edu.bu.LanguageCorrection; import java.io.IOException; import java.nio.file.Files; import java.nio.file.Paths; import java.util.List; import java.util.Map; import java.util.HashMap; import java.util.ArrayList; import java.util.zip.Inflater; import java.io.ByteArrayOutputStream; import java.io.FileInputStream; public class Checker { public void analyze(String text) { List<String> sentences = TextProcessor.extractSentences(text); Loading @@ -21,6 +20,7 @@ public class Checker { Map<String, Float> phraseScores = new HashMap<>(); for (String sentence : sentences) { sentence = sentence.replaceAll("[^a-zA-Z0-9\\s]", ""); // System.out.println("Analyzing sentence: " + sentence); List<String> phrases = TextProcessor.extractPhrases(sentence, 2, 3); Loading @@ -28,7 +28,7 @@ public class Checker { for (String phrase : phrases) { // System.out.println("Analyzing phrase: " + phrase); float perplexity = detector.perplexity(phrase); if (perplexity > 100) { if (perplexity < 0) { phraseScores.put(phrase, 100f); } else { phraseScores.put(phrase, perplexity); Loading @@ -50,6 +50,7 @@ public class Checker { System.out.println("\"phrases\": " + mapToJson(phraseScores)); System.out.println("}"); } private static String mapToJson(Map<String, Float> map) { StringBuilder jsonBuilder = new StringBuilder("{"); for (Map.Entry<String, Float> entry : map.entrySet()) { Loading @@ -60,6 +61,7 @@ public class Checker { return jsonBuilder.toString(); } private static byte[] decompress(byte[] compressedData) { Inflater decompressor = new Inflater(); decompressor.setInput(compressedData); Loading Loading @@ -93,6 +95,7 @@ public class Checker { return new TrieNode(); } } public static void main(String[] args) { if (args.length > 1 && "--file".equals(args[0])) { // check syntax String path = args[1]; Loading src/main/java/edu/bu/LanguageCorrection/MainApp.java +84 −39 Original line number Diff line number Diff line package edu.bu.LanguageCorrection; import javax.sql.rowset.spi.SyncFactory; import javax.swing.*; import javax.swing.text.Highlighter; import javax.swing.text.Highlighter.Highlight; import dotterweide.editor.painter.HighlightPainter; import java.awt.*; // import java.awt.event.ActionEvent; import java.io.ByteArrayOutputStream; Loading @@ -11,6 +14,12 @@ import java.io.PrintStream; import java.nio.file.Files; import java.nio.file.Paths; import java.util.List; import java.util.Map; import javax.swing.*; import javax.swing.text.*; import java.awt.Color; import java.awt.BorderLayout; import java.awt.event.ActionEvent; public class MainApp extends JFrame { private final JTextField urlField; Loading Loading @@ -116,16 +125,25 @@ public class MainApp extends JFrame { String content; if (isFile) { content = new String(Files.readAllBytes(Paths.get(text))); } else } else { content = text; } // Assume content is already properly split into sentences here List<String> sentences = TextProcessor.extractSentences(content); // Use a method to split into sentences Checker checker = new Checker(); checker.analyze(content); // This outputs void so we need to take the output from the console StringBuilder result = new StringBuilder(); ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); PrintStream printStream = new PrintStream(outputStream); PrintStream originalOut = System.out; String[] worstPhrases = new String[sentences.size()]; for (String sentence : sentences) { System.setOut(printStream); checker.analyze(sentence); // Analyze each sentence separately // Reset System.out System.out.flush(); Loading @@ -133,41 +151,68 @@ public class MainApp extends JFrame { // Capture the output into a string String output = outputStream.toString(); outputStream.reset(); // Clear the output stream for the next sentence //System.out.println(output); // Parsing the output to get sentences and phrases // Parsing the output to get phrases and their scores String[] lines = output.split("\n"); String[] sentences = null; double lowestScore = Double.MAX_VALUE; String worstPhrase = null; for (String line : lines) { if (line.startsWith("{")) { continue; } else if (line.startsWith("sentences:")) { line = line.replace("sentences:", "").replaceAll("\"{", "").replaceAll("}", ""); sentences = line.split(","); // List of sentences } else if (line.startsWith("phrases:")) { line = line.replace("phrases:", "").replaceAll("\"{", "").replaceAll("}", ""); phrases = line.split(","); // List of phrases if (line.startsWith("\"phrases\":")) { //System.out.println(line); line = line.replace("\"phrases\":", "").replace("{", "").replace("}", "").trim(); //System.out.println(line); String[] phrases = line.split(","); //System.out.println(line); for (String phrase : phrases) { double phraseScore = phrases; // Assuming highlight() method is defined elsewhere highlight(resultArea, phrase, phraseScore); // Pass the phrase and its score to highlight String[] parts = phrase.trim().split(":"); double phraseScore = parts[1].trim().equals("null") ? 0 : Double.parseDouble(parts[1].trim()); if (phraseScore < lowestScore) { lowestScore = phraseScore; worstPhrase = parts[0].trim(); worstPhrases[sentences.indexOf(sentence)] = worstPhrase; } } } } catch (Exception e) { resultArea.setText("Error: " + e.getMessage()); } if (worstPhrase != null) { // Append to the result with annotations result.append("\nSentence: ").append(sentence) .append("\n>> Worst Phrase: ").append(worstPhrase) .append(" (Score: ").append(lowestScore).append(")\n"); } } public void highlight(JTextArea textArea, String phrase, String pattern) { resultArea.setText(result.toString()); // Display the annotated results in the JTextArea // Highlight the worst phrase in each sentence Highlighter highlighter = resultArea.getHighlighter(); Highlighter.HighlightPainter painter = new DefaultHighlighter.DefaultHighlightPainter(Color.YELLOW); for (String phrase : worstPhrases) { phrase = phrase.replaceAll("\"", ""); //System.out.println(phrase); if (phrase != null) { //System.out.println(resultArea.getText()); int start = resultArea.getText().indexOf(phrase); if (start != -1) { int end = start + phrase.length(); try { Highlighter hilite = textArea.getHighlighter(); int pos = 0; while ((pos = phrase.indexOf(pattern, pos)) >= 0) { // Create highlighter using private painter and apply around pattern hilite.addHighlight(pos, pos + pattern.length(), myHighlightPainter); pos += pattern.length(); highlighter.addHighlight(start, end, painter); } catch (BadLocationException e) { e.printStackTrace(); } } else { System.out.println("Text not found"); } } } } catch (Exception e) { //throw new RuntimeException(e); resultArea.setText("Error: " + e.getMessage()); } } Loading src/main/java/edu/bu/LanguageCorrection/crawler.java +3 −0 Original line number Diff line number Diff line Loading @@ -136,6 +136,9 @@ public class crawler { System.err.println("Unsupported language: " + language); return; } build_off_corpus = true; processPage(get_file_text(corpus)); } Loading Loading
checker_test_file.txt +1 −1 Original line number Diff line number Diff line This is unambiguous. This is word odd so choice. Sorry for the inconvenience. Kenya, officially the Republic of Kenya No newline at end of file Kenya, officially the Republic of Kenya. No newline at end of file
pom.xml +6 −0 Original line number Diff line number Diff line Loading @@ -20,6 +20,12 @@ <artifactId>jsoup</artifactId> <version>1.15.3</version> </dependency> <dependency> <groupId>de.sciss</groupId> <artifactId>dotterweide-ui_2.12</artifactId> <version>0.4.3</version> </dependency> </dependencies> </project> No newline at end of file
src/main/java/edu/bu/LanguageCorrection/Checker.java +7 −4 Original line number Diff line number Diff line package edu.bu.LanguageCorrection; import java.io.IOException; import java.nio.file.Files; import java.nio.file.Paths; import java.util.List; import java.util.Map; import java.util.HashMap; import java.util.ArrayList; import java.util.zip.Inflater; import java.io.ByteArrayOutputStream; import java.io.FileInputStream; public class Checker { public void analyze(String text) { List<String> sentences = TextProcessor.extractSentences(text); Loading @@ -21,6 +20,7 @@ public class Checker { Map<String, Float> phraseScores = new HashMap<>(); for (String sentence : sentences) { sentence = sentence.replaceAll("[^a-zA-Z0-9\\s]", ""); // System.out.println("Analyzing sentence: " + sentence); List<String> phrases = TextProcessor.extractPhrases(sentence, 2, 3); Loading @@ -28,7 +28,7 @@ public class Checker { for (String phrase : phrases) { // System.out.println("Analyzing phrase: " + phrase); float perplexity = detector.perplexity(phrase); if (perplexity > 100) { if (perplexity < 0) { phraseScores.put(phrase, 100f); } else { phraseScores.put(phrase, perplexity); Loading @@ -50,6 +50,7 @@ public class Checker { System.out.println("\"phrases\": " + mapToJson(phraseScores)); System.out.println("}"); } private static String mapToJson(Map<String, Float> map) { StringBuilder jsonBuilder = new StringBuilder("{"); for (Map.Entry<String, Float> entry : map.entrySet()) { Loading @@ -60,6 +61,7 @@ public class Checker { return jsonBuilder.toString(); } private static byte[] decompress(byte[] compressedData) { Inflater decompressor = new Inflater(); decompressor.setInput(compressedData); Loading Loading @@ -93,6 +95,7 @@ public class Checker { return new TrieNode(); } } public static void main(String[] args) { if (args.length > 1 && "--file".equals(args[0])) { // check syntax String path = args[1]; Loading
src/main/java/edu/bu/LanguageCorrection/MainApp.java +84 −39 Original line number Diff line number Diff line package edu.bu.LanguageCorrection; import javax.sql.rowset.spi.SyncFactory; import javax.swing.*; import javax.swing.text.Highlighter; import javax.swing.text.Highlighter.Highlight; import dotterweide.editor.painter.HighlightPainter; import java.awt.*; // import java.awt.event.ActionEvent; import java.io.ByteArrayOutputStream; Loading @@ -11,6 +14,12 @@ import java.io.PrintStream; import java.nio.file.Files; import java.nio.file.Paths; import java.util.List; import java.util.Map; import javax.swing.*; import javax.swing.text.*; import java.awt.Color; import java.awt.BorderLayout; import java.awt.event.ActionEvent; public class MainApp extends JFrame { private final JTextField urlField; Loading Loading @@ -116,16 +125,25 @@ public class MainApp extends JFrame { String content; if (isFile) { content = new String(Files.readAllBytes(Paths.get(text))); } else } else { content = text; } // Assume content is already properly split into sentences here List<String> sentences = TextProcessor.extractSentences(content); // Use a method to split into sentences Checker checker = new Checker(); checker.analyze(content); // This outputs void so we need to take the output from the console StringBuilder result = new StringBuilder(); ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); PrintStream printStream = new PrintStream(outputStream); PrintStream originalOut = System.out; String[] worstPhrases = new String[sentences.size()]; for (String sentence : sentences) { System.setOut(printStream); checker.analyze(sentence); // Analyze each sentence separately // Reset System.out System.out.flush(); Loading @@ -133,41 +151,68 @@ public class MainApp extends JFrame { // Capture the output into a string String output = outputStream.toString(); outputStream.reset(); // Clear the output stream for the next sentence //System.out.println(output); // Parsing the output to get sentences and phrases // Parsing the output to get phrases and their scores String[] lines = output.split("\n"); String[] sentences = null; double lowestScore = Double.MAX_VALUE; String worstPhrase = null; for (String line : lines) { if (line.startsWith("{")) { continue; } else if (line.startsWith("sentences:")) { line = line.replace("sentences:", "").replaceAll("\"{", "").replaceAll("}", ""); sentences = line.split(","); // List of sentences } else if (line.startsWith("phrases:")) { line = line.replace("phrases:", "").replaceAll("\"{", "").replaceAll("}", ""); phrases = line.split(","); // List of phrases if (line.startsWith("\"phrases\":")) { //System.out.println(line); line = line.replace("\"phrases\":", "").replace("{", "").replace("}", "").trim(); //System.out.println(line); String[] phrases = line.split(","); //System.out.println(line); for (String phrase : phrases) { double phraseScore = phrases; // Assuming highlight() method is defined elsewhere highlight(resultArea, phrase, phraseScore); // Pass the phrase and its score to highlight String[] parts = phrase.trim().split(":"); double phraseScore = parts[1].trim().equals("null") ? 0 : Double.parseDouble(parts[1].trim()); if (phraseScore < lowestScore) { lowestScore = phraseScore; worstPhrase = parts[0].trim(); worstPhrases[sentences.indexOf(sentence)] = worstPhrase; } } } } catch (Exception e) { resultArea.setText("Error: " + e.getMessage()); } if (worstPhrase != null) { // Append to the result with annotations result.append("\nSentence: ").append(sentence) .append("\n>> Worst Phrase: ").append(worstPhrase) .append(" (Score: ").append(lowestScore).append(")\n"); } } public void highlight(JTextArea textArea, String phrase, String pattern) { resultArea.setText(result.toString()); // Display the annotated results in the JTextArea // Highlight the worst phrase in each sentence Highlighter highlighter = resultArea.getHighlighter(); Highlighter.HighlightPainter painter = new DefaultHighlighter.DefaultHighlightPainter(Color.YELLOW); for (String phrase : worstPhrases) { phrase = phrase.replaceAll("\"", ""); //System.out.println(phrase); if (phrase != null) { //System.out.println(resultArea.getText()); int start = resultArea.getText().indexOf(phrase); if (start != -1) { int end = start + phrase.length(); try { Highlighter hilite = textArea.getHighlighter(); int pos = 0; while ((pos = phrase.indexOf(pattern, pos)) >= 0) { // Create highlighter using private painter and apply around pattern hilite.addHighlight(pos, pos + pattern.length(), myHighlightPainter); pos += pattern.length(); highlighter.addHighlight(start, end, painter); } catch (BadLocationException e) { e.printStackTrace(); } } else { System.out.println("Text not found"); } } } } catch (Exception e) { //throw new RuntimeException(e); resultArea.setText("Error: " + e.getMessage()); } } Loading
src/main/java/edu/bu/LanguageCorrection/crawler.java +3 −0 Original line number Diff line number Diff line Loading @@ -136,6 +136,9 @@ public class crawler { System.err.println("Unsupported language: " + language); return; } build_off_corpus = true; processPage(get_file_text(corpus)); } Loading