Commit a24227ab authored by Manuel  Segimon's avatar Manuel Segimon
Browse files

add highlighting to runchecker

parent e13a10e2
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
This is unambiguous. This is word odd so choice. Sorry for the inconvenience.
Kenya, officially the Republic of Kenya
 No newline at end of file
Kenya, officially the Republic of Kenya.
 No newline at end of file
+6 −0
Original line number Diff line number Diff line
@@ -20,6 +20,12 @@
            <artifactId>jsoup</artifactId>
            <version>1.15.3</version>
        </dependency>
    
      <dependency>
        <groupId>de.sciss</groupId>
        <artifactId>dotterweide-ui_2.12</artifactId>
        <version>0.4.3</version>
      </dependency>
    </dependencies>

</project>
 No newline at end of file
+7 −4
Original line number Diff line number Diff line
package edu.bu.LanguageCorrection;

import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.List;
import java.util.Map;
import java.util.HashMap;
import java.util.ArrayList;
import java.util.zip.Inflater;
import java.io.ByteArrayOutputStream;
import java.io.FileInputStream;


public class Checker {
    public void analyze(String text) {
        List<String> sentences = TextProcessor.extractSentences(text);
@@ -21,6 +20,7 @@ public class Checker {
        Map<String, Float> phraseScores = new HashMap<>();

        for (String sentence : sentences) {
            sentence = sentence.replaceAll("[^a-zA-Z0-9\\s]", "");
            // System.out.println("Analyzing sentence: " + sentence);
            List<String> phrases = TextProcessor.extractPhrases(sentence, 2, 3);

@@ -28,7 +28,7 @@ public class Checker {
            for (String phrase : phrases) {
                // System.out.println("Analyzing phrase: " + phrase);
                float perplexity = detector.perplexity(phrase);
                if (perplexity > 100) {
                if (perplexity < 0) {
                    phraseScores.put(phrase, 100f);
                } else {
                    phraseScores.put(phrase, perplexity);
@@ -50,6 +50,7 @@ public class Checker {
        System.out.println("\"phrases\": " + mapToJson(phraseScores));
        System.out.println("}");
    }

    private static String mapToJson(Map<String, Float> map) {
        StringBuilder jsonBuilder = new StringBuilder("{");
        for (Map.Entry<String, Float> entry : map.entrySet()) {
@@ -60,6 +61,7 @@ public class Checker {

        return jsonBuilder.toString();
    }

    private static byte[] decompress(byte[] compressedData) {
        Inflater decompressor = new Inflater();
        decompressor.setInput(compressedData);
@@ -93,6 +95,7 @@ public class Checker {
            return new TrieNode();
        }
    }

    public static void main(String[] args) {
        if (args.length > 1 && "--file".equals(args[0])) { // check syntax
            String path = args[1];
+84 −39
Original line number Diff line number Diff line
package edu.bu.LanguageCorrection;

import javax.sql.rowset.spi.SyncFactory;
import javax.swing.*;
import javax.swing.text.Highlighter;
import javax.swing.text.Highlighter.Highlight;

import dotterweide.editor.painter.HighlightPainter;

import java.awt.*;
// import java.awt.event.ActionEvent;
import java.io.ByteArrayOutputStream;
@@ -11,6 +14,12 @@ import java.io.PrintStream;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.List;
import java.util.Map;
import javax.swing.*;
import javax.swing.text.*;
import java.awt.Color;
import java.awt.BorderLayout;
import java.awt.event.ActionEvent;

public class MainApp extends JFrame {
    private final JTextField urlField;
@@ -116,16 +125,25 @@ public class MainApp extends JFrame {
            String content;
            if (isFile) {
                content = new String(Files.readAllBytes(Paths.get(text)));
            } else 
            } else {
                content = text;
            }

            // Assume content is already properly split into sentences here
            List<String> sentences = TextProcessor.extractSentences(content); // Use a method to split into sentences

            Checker checker = new Checker();
            checker.analyze(content); // This outputs void so we need to take the output from the console
            StringBuilder result = new StringBuilder();

            ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
            PrintStream printStream = new PrintStream(outputStream);
            PrintStream originalOut = System.out;

            String[] worstPhrases = new String[sentences.size()];

            for (String sentence : sentences) {
                System.setOut(printStream);
                checker.analyze(sentence); // Analyze each sentence separately

                // Reset System.out
                System.out.flush();
@@ -133,41 +151,68 @@ public class MainApp extends JFrame {

                // Capture the output into a string
                String output = outputStream.toString();
                outputStream.reset(); // Clear the output stream for the next sentence
                //System.out.println(output);

            // Parsing the output to get sentences and phrases
                // Parsing the output to get phrases and their scores
                String[] lines = output.split("\n");
            String[] sentences = null;
                double lowestScore = Double.MAX_VALUE;
                String worstPhrase = null;

                for (String line : lines) {
                if (line.startsWith("{")) {
                    continue;
                } else if (line.startsWith("sentences:")) {
                    line = line.replace("sentences:", "").replaceAll("\"{", "").replaceAll("}", "");
                    sentences = line.split(","); // List of sentences
                } else if (line.startsWith("phrases:")) {
                    line = line.replace("phrases:", "").replaceAll("\"{", "").replaceAll("}", "");
                    phrases = line.split(","); // List of phrases
                    if (line.startsWith("\"phrases\":")) {
                        //System.out.println(line);
                        line = line.replace("\"phrases\":", "").replace("{", "").replace("}", "").trim();
                        //System.out.println(line);
                        String[] phrases = line.split(",");
                        //System.out.println(line);
                        for (String phrase : phrases) {
                        double phraseScore = phrases;
                        // Assuming highlight() method is defined elsewhere
                        highlight(resultArea, phrase, phraseScore); // Pass the phrase and its score to highlight
                            String[] parts = phrase.trim().split(":");
                            double phraseScore = parts[1].trim().equals("null") ? 0
                                    : Double.parseDouble(parts[1].trim());
                            if (phraseScore < lowestScore) {
                                lowestScore = phraseScore;
                                worstPhrase = parts[0].trim();
                                worstPhrases[sentences.indexOf(sentence)] = worstPhrase;
                            }
                        }
                    }
        } catch (Exception e) {
            resultArea.setText("Error: " + e.getMessage());
                }

                if (worstPhrase != null) {
                    // Append to the result with annotations
                    result.append("\nSentence: ").append(sentence)
                            .append("\n>> Worst Phrase: ").append(worstPhrase)
                            .append(" (Score: ").append(lowestScore).append(")\n");
                }
            }

    public void highlight(JTextArea textArea, String phrase, String pattern) {
            resultArea.setText(result.toString()); // Display the annotated results in the JTextArea

            // Highlight the worst phrase in each sentence
            Highlighter highlighter = resultArea.getHighlighter();
            Highlighter.HighlightPainter painter = new DefaultHighlighter.DefaultHighlightPainter(Color.YELLOW);
            for (String phrase : worstPhrases) {
                phrase = phrase.replaceAll("\"", "");
                //System.out.println(phrase);
                if (phrase != null) {
                    //System.out.println(resultArea.getText());
                    int start = resultArea.getText().indexOf(phrase);
                    if (start != -1) {
                        int end = start + phrase.length();
                        try {
            Highlighter hilite = textArea.getHighlighter();
            int pos = 0;
            while ((pos = phrase.indexOf(pattern, pos)) >= 0) {
                // Create highlighter using private painter and apply around pattern
                hilite.addHighlight(pos, pos + pattern.length(), myHighlightPainter);
                pos += pattern.length();
                            highlighter.addHighlight(start, end, painter);
                        } catch (BadLocationException e) {
                            e.printStackTrace();
                        }
                    } else {
                        System.out.println("Text not found");
                    }
                }
            }
        } catch (Exception e) {
            //throw new RuntimeException(e);
            resultArea.setText("Error: " + e.getMessage());
        }
    }

+3 −0
Original line number Diff line number Diff line
@@ -136,6 +136,9 @@ public class crawler {
            System.err.println("Unsupported language: " + language);
            return;
        }

        build_off_corpus = true;

        processPage(get_file_text(corpus));
    }