Commit 605ed3aa authored by Manuel  Segimon's avatar Manuel Segimon
Browse files

work in progress

parent 7058b39c
Loading
Loading
Loading
Loading
+2 −6
Original line number Diff line number Diff line
@@ -28,7 +28,7 @@ public class Checker {
            for (String phrase : phrases) {
                // System.out.println("Analyzing phrase: " + phrase);
                float perplexity = detector.perplexity(phrase);
                if (perplexity < 0) {
                if (perplexity > 100.0) {
                    phraseScores.put(phrase, 100f);
                } else {
                    phraseScores.put(phrase, perplexity);
@@ -36,11 +36,7 @@ public class Checker {
            }

            // Calculate average perplexity for the sentence
            float sentenceScore = 0;
            for (String phrase : phrases) {
                sentenceScore += phraseScores.get(phrase);
            }
            sentenceScore /= phrases.size();
            float sentenceScore = detector.perplexity(sentence);
            sentenceScores.put(sentence, sentenceScore);
        }

+1 −1
Original line number Diff line number Diff line
@@ -360,7 +360,7 @@ public class MainApp extends JFrame {
            if (changeMade) {
                byte[] serialized = node.serialize();
                byte[] compressed = compress(serialized);
                writeToFile(compressed, "metadata.ser");
                writeToFile(compressed, languageFile);
            }
        } catch (Exception e) {
            resultArea.setText("Error: " + e.getMessage());
+22 −13
Original line number Diff line number Diff line
@@ -3,6 +3,7 @@ package edu.bu.LanguageCorrection;
import java.io.Serializable;
import java.util.HashMap;
import java.util.Map;
import java.util.List;

public class TrieNode implements Serializable, Cloneable {
    HashMap<String, TrieNode> children = new HashMap<>();
@@ -93,7 +94,11 @@ public class TrieNode implements Serializable, Cloneable {
        return res;
    }

    public float perplexity(String phrase) {
    public float perplexity(String sentence) {
        List<String> phrases = TextProcessor.extractPhrases(sentence, 2, 3);
        float perplexity = 0;

        for (String phrase : phrases) {
            float logProb = 0;
            String[] words = phrase.split(" ");
            String currentPhrase = "";
@@ -105,9 +110,13 @@ public class TrieNode implements Serializable, Cloneable {
                }
                logProb += Math.log((probability(currentPhrase)));
            }
        float perplexity = (float) Math.pow(2, -logProb);
        //System.out.println("Perplexity of phrase (" + phrase + ") : " + ((float) perplexity / factorial(words.length)));
        return (float) perplexity / factorial(words.length);
            perplexity += (float) Math.pow(2, -logProb);
            // System.out.println("Perplexity of phrase (" + phrase + ") : " + ((float)
            // perplexity / factorial(words.length)));
        }

        
        return (float) perplexity / phrases.size();
    }
    
    public byte[] serialize() {