add highlighting to runchecker (a24227ab) · Commits · EC504 Spring 2024 Group Projects / Group7

checker_test_file.txt

+1 −1

Original line number	Diff line number	Diff line
		This is unambiguous. This is word odd so choice. Sorry for the inconvenience.
		Kenya, officially the Republic of Kenya
		No newline at end of file
		Kenya, officially the Republic of Kenya.
		No newline at end of file

pom.xml

+6 −0

Original line number	Diff line number	Diff line
		@@ -20,6 +20,12 @@
		<artifactId>jsoup</artifactId>
		<version>1.15.3</version>
		</dependency>

		<dependency>
		<groupId>de.sciss</groupId>
		<artifactId>dotterweide-ui_2.12</artifactId>
		<version>0.4.3</version>
		</dependency>
		</dependencies>

		</project>
		No newline at end of file

src/main/java/edu/bu/LanguageCorrection/Checker.java

+7 −4

Original line number	Diff line number	Diff line
		package edu.bu.LanguageCorrection;

		import java.io.IOException;
		import java.nio.file.Files;
		import java.nio.file.Paths;
		import java.util.List;
		import java.util.Map;
		import java.util.HashMap;
		import java.util.ArrayList;
		import java.util.zip.Inflater;
		import java.io.ByteArrayOutputStream;
		import java.io.FileInputStream;


		public class Checker {
		public void analyze(String text) {
		List<String> sentences = TextProcessor.extractSentences(text);
		@@ -21,6 +20,7 @@ public class Checker {
		Map<String, Float> phraseScores = new HashMap<>();

		for (String sentence : sentences) {
		sentence = sentence.replaceAll("[^a-zA-Z0-9\\s]", "");
		// System.out.println("Analyzing sentence: " + sentence);
		List<String> phrases = TextProcessor.extractPhrases(sentence, 2, 3);

		@@ -28,7 +28,7 @@ public class Checker {
		for (String phrase : phrases) {
		// System.out.println("Analyzing phrase: " + phrase);
		float perplexity = detector.perplexity(phrase);
		if (perplexity > 100) {
		if (perplexity < 0) {
		phraseScores.put(phrase, 100f);
		} else {
		phraseScores.put(phrase, perplexity);
		@@ -50,6 +50,7 @@ public class Checker {
		System.out.println("\"phrases\": " + mapToJson(phraseScores));
		System.out.println("}");
		}

		private static String mapToJson(Map<String, Float> map) {
		StringBuilder jsonBuilder = new StringBuilder("{");
		for (Map.Entry<String, Float> entry : map.entrySet()) {
		@@ -60,6 +61,7 @@ public class Checker {

		return jsonBuilder.toString();
		}

		private static byte[] decompress(byte[] compressedData) {
		Inflater decompressor = new Inflater();
		decompressor.setInput(compressedData);
		@@ -93,6 +95,7 @@ public class Checker {
		return new TrieNode();
		}
		}

		public static void main(String[] args) {
		if (args.length > 1 && "--file".equals(args[0])) { // check syntax
		String path = args[1];

src/main/java/edu/bu/LanguageCorrection/MainApp.java

+84 −39

Original line number	Diff line number	Diff line
		package edu.bu.LanguageCorrection;

		import javax.sql.rowset.spi.SyncFactory;
		import javax.swing.*;
		import javax.swing.text.Highlighter;
		import javax.swing.text.Highlighter.Highlight;

		import dotterweide.editor.painter.HighlightPainter;

		import java.awt.*;
		// import java.awt.event.ActionEvent;
		import java.io.ByteArrayOutputStream;
		@@ -11,6 +14,12 @@ import java.io.PrintStream;
		import java.nio.file.Files;
		import java.nio.file.Paths;
		import java.util.List;
		import java.util.Map;
		import javax.swing.*;
		import javax.swing.text.*;
		import java.awt.Color;
		import java.awt.BorderLayout;
		import java.awt.event.ActionEvent;

		public class MainApp extends JFrame {
		private final JTextField urlField;
		@@ -116,16 +125,25 @@ public class MainApp extends JFrame {
		String content;
		if (isFile) {
		content = new String(Files.readAllBytes(Paths.get(text)));
		} else
		} else {
		content = text;
		}

		// Assume content is already properly split into sentences here
		List<String> sentences = TextProcessor.extractSentences(content); // Use a method to split into sentences

		Checker checker = new Checker();
		checker.analyze(content); // This outputs void so we need to take the output from the console
		StringBuilder result = new StringBuilder();

		ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
		PrintStream printStream = new PrintStream(outputStream);
		PrintStream originalOut = System.out;

		String[] worstPhrases = new String[sentences.size()];

		for (String sentence : sentences) {
		System.setOut(printStream);
		checker.analyze(sentence); // Analyze each sentence separately

		// Reset System.out
		System.out.flush();
		@@ -133,41 +151,68 @@ public class MainApp extends JFrame {

		// Capture the output into a string
		String output = outputStream.toString();
		outputStream.reset(); // Clear the output stream for the next sentence
		//System.out.println(output);

		// Parsing the output to get sentences and phrases
		// Parsing the output to get phrases and their scores
		String[] lines = output.split("\n");
		String[] sentences = null;
		double lowestScore = Double.MAX_VALUE;
		String worstPhrase = null;

		for (String line : lines) {
		if (line.startsWith("{")) {
		continue;
		} else if (line.startsWith("sentences:")) {
		line = line.replace("sentences:", "").replaceAll("\"{", "").replaceAll("}", "");
		sentences = line.split(","); // List of sentences
		} else if (line.startsWith("phrases:")) {
		line = line.replace("phrases:", "").replaceAll("\"{", "").replaceAll("}", "");
		phrases = line.split(","); // List of phrases
		if (line.startsWith("\"phrases\":")) {
		//System.out.println(line);
		line = line.replace("\"phrases\":", "").replace("{", "").replace("}", "").trim();
		//System.out.println(line);
		String[] phrases = line.split(",");
		//System.out.println(line);
		for (String phrase : phrases) {
		double phraseScore = phrases;
		// Assuming highlight() method is defined elsewhere
		highlight(resultArea, phrase, phraseScore); // Pass the phrase and its score to highlight
		String[] parts = phrase.trim().split(":");
		double phraseScore = parts[1].trim().equals("null") ? 0
		: Double.parseDouble(parts[1].trim());
		if (phraseScore < lowestScore) {
		lowestScore = phraseScore;
		worstPhrase = parts[0].trim();
		worstPhrases[sentences.indexOf(sentence)] = worstPhrase;
		}
		}
		}
		} catch (Exception e) {
		resultArea.setText("Error: " + e.getMessage());
		}

		if (worstPhrase != null) {
		// Append to the result with annotations
		result.append("\nSentence: ").append(sentence)
		.append("\n>> Worst Phrase: ").append(worstPhrase)
		.append(" (Score: ").append(lowestScore).append(")\n");
		}
		}

		public void highlight(JTextArea textArea, String phrase, String pattern) {
		resultArea.setText(result.toString()); // Display the annotated results in the JTextArea

		// Highlight the worst phrase in each sentence
		Highlighter highlighter = resultArea.getHighlighter();
		Highlighter.HighlightPainter painter = new DefaultHighlighter.DefaultHighlightPainter(Color.YELLOW);
		for (String phrase : worstPhrases) {
		phrase = phrase.replaceAll("\"", "");
		//System.out.println(phrase);
		if (phrase != null) {
		//System.out.println(resultArea.getText());
		int start = resultArea.getText().indexOf(phrase);
		if (start != -1) {
		int end = start + phrase.length();
		try {
		Highlighter hilite = textArea.getHighlighter();
		int pos = 0;
		while ((pos = phrase.indexOf(pattern, pos)) >= 0) {
		// Create highlighter using private painter and apply around pattern
		hilite.addHighlight(pos, pos + pattern.length(), myHighlightPainter);
		pos += pattern.length();
		highlighter.addHighlight(start, end, painter);
		} catch (BadLocationException e) {
		e.printStackTrace();
		}
		} else {
		System.out.println("Text not found");
		}
		}
		}
		} catch (Exception e) {
		//throw new RuntimeException(e);
		resultArea.setText("Error: " + e.getMessage());
		}
		}

src/main/java/edu/bu/LanguageCorrection/crawler.java

+3 −0

Original line number	Diff line number	Diff line
		@@ -136,6 +136,9 @@ public class crawler {
		System.err.println("Unsupported language: " + language);
		return;
		}

		build_off_corpus = true;

		processPage(get_file_text(corpus));
		}