Commit 1a0f2b7d authored by Moises Bensadon's avatar Moises Bensadon
Browse files

Issue #6 Checker. CLI and scoring implemented



Co-authored-by: default avatarManuel Segimon <manuelsp@bu.edu>
parent 2a0e14fd
Loading
Loading
Loading
Loading
+65 −0
Original line number Diff line number Diff line
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

public class AnomalyDetector {

    private static final Set<String> commonWords = new HashSet<>();
    private static final int AVERAGE_WORD_LENGTH = 5; // TODO: Change later

    static { // simple list of common words for demonstration purposes
        commonWords.add("the");
        commonWords.add("be");
        commonWords.add("to");
        commonWords.add("of");
        commonWords.add("and"); // we can add more later
    }

    public Map<String, Integer> analyzeSentences(List<String> sentences) {
        Map<String, Integer> sentenceScores = new HashMap<>();
        for (String sentence : sentences) {
            int score = 0;
            // length variance
            score += Math.abs(sentence.length() - AVERAGE_WORD_LENGTH * 10); // Assuming an average sentence length

            // word rarity
            String[] words = sentence.split("\\s+");
            for (String word : words) {
                if (!commonWords.contains(word.toLowerCase())) {
                    score += 10; // Increment score for each UNCOMMON word
                }
            }

            sentenceScores.put(sentence, Math.min(score, 100)); // Limit to 100 (normalize)
        }
        return sentenceScores;
    }

    public Map<String, Integer> analyzePhrases(List<String> sentences) {
        Map<String, Integer> phraseScores = new HashMap<>();

        for (String sentence : sentences) {
            List<String> phrases = TextProcessor.extractPhrases(sentence, 2, 3);

            for (String phrase : phrases) {
                int score = 0;
                // Score length variance like above
                if (phrase.length() < AVERAGE_WORD_LENGTH || phrase.length() > AVERAGE_WORD_LENGTH * 3) {
                    score += 20;
                }
                // Word rarity
                String[] words = phrase.split("\\s+");
                for (String word : words) {
                    if (!commonWords.contains(word.toLowerCase())) {
                        score += 5;
                    }
                }
                phraseScores.put(phrase, Math.min(score, 100)); // Normalize
            }
        }

        return phraseScores;
    }
}

Checker/CLI.java

0 → 100644
+25 −0
Original line number Diff line number Diff line
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Paths;

public class CLI {
    public static void main(String[] args) {
        // Check if the correct number of arguments are passed
        if (args.length > 1 && "--file".equals(args[0])) {
            String path = args[1];
            try {
                // Read the entire file content into a single String
                String content = new String(Files.readAllBytes(Paths.get(path)));

                // Now, you can analyze this content
                Checker checker = new Checker();
                checker.analyze(content);
            } catch (IOException e) {
                // Handle exceptions, like file not found
                System.err.println("Error reading file: " + e.getMessage());
            }
        } else {
            System.out.println("Invalid arguments. Usage: CLI --file [path]");
        }
    }
}

Checker/Checker.java

0 → 100644
+18 −0
Original line number Diff line number Diff line
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.List;
import java.util.Map;

public class Checker {
    public void analyze(String text) {
        List<String> sentences = TextProcessor.extractSentences(text);
        AnomalyDetector detector = new AnomalyDetector();

        Map<String, Integer> sentenceScores = detector.analyzeSentences(sentences);
        Map<String, Integer> phraseScores = detector.analyzePhrases(sentences);

        // Output results
        System.out.println("{");
        System.out.println("\"sentences\": " + sentenceScores + ",");
        System.ou
 No newline at end of file
+32 −0
Original line number Diff line number Diff line
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

public class TextProcessor {
    public static List<String> extractSentences(String text) {
        List<String> sentences = List.of(text.split("\\."));
        return new ArrayList<>(sentences);
    }

    // Extracts phrases with variable lengths using n-gram method
    public static List<String> extractPhrases(String sentence, int minN, int maxN) {
        // Using a Set to avoid duplicate phrases
        Set<String> phraseSet = new HashSet<>();
        String[] words = sentence.split("\\s+");

        // Loop over the range of n values
        for (int n = minN; n <= maxN; n++) {
            for (int i = 0; i < words.length - n + 1; i++) {
                StringBuilder sb = new StringBuilder();
                for (int j = i; j < i + n; j++) {
                    sb.append((j > i ? " " : "") + words[j]);
                }
                phraseSet.add(sb.toString());
            }
        }

        // Convert the set back to a list to maintain the original interface
        return new ArrayList<>(phraseSet);
    }
}

Checker/test.txt

0 → 100644
+1 −0
Original line number Diff line number Diff line
This is strange so choice word.
 No newline at end of file