Loading src/main/java/edu/bu/LanguageCorrection/Corrector.java +43 −20 Original line number Diff line number Diff line Loading @@ -10,15 +10,22 @@ import java.util.List; import java.util.Map; import java.util.ArrayList; import java.util.PriorityQueue; import java.util.function.Consumer; import java.util.Collections; import java.util.HashMap; public class Corrector { private TrieNode detector; private static Consumer<String> outputCallback; public Corrector(String metadataPath) { detector = loadFile(metadataPath); outputCallback = System.out::println; } public void setCallback(Consumer<String> callback) { outputCallback = callback; } public TrieNode getDetector() { Loading Loading @@ -174,37 +181,52 @@ public class Corrector { } } private static void printSentencesInOrderOfChanges(String[] sentences, String originalSentence) { private static int longestCommonSubsequence(String[] originalWords, String[] shuffledWords) { int n = originalWords.length; int m = shuffledWords.length; int[][] dp = new int[n + 1][m + 1]; for (int i = 1; i <= n; i++) { for (int j = 1; j <= m; j++) { if (originalWords[i - 1].equals(shuffledWords[j - 1])) { dp[i][j] = dp[i - 1][j - 1] + 1; } else { dp[i][j] = Math.max(dp[i - 1][j], dp[i][j - 1]); } } } return dp[n][m]; } public void printSentencesInOrderOfChanges(String[] sentences, String originalSentence) { // Order the sentences by the number of changes needed Map<String, Integer> changesMap = new HashMap<>(); String[] originalWords = originalSentence.split(" "); for (String sentence : sentences) { if (sentence == null || sentence.isEmpty()) continue; // Skip empty sentences (not in the 0.5 percentile of the best sentence) if (sentence == null || sentence.isEmpty()) { continue; } String sentencePart = sentence.split(" | Score: ")[0]; // Remove score (if present String sentencePart = sentence.split(" \\| Score: ")[0]; // Remove score, if present String[] shuffledWords = sentencePart.split(" "); int changes = 0; if (sentencePart.length() != originalSentence.length()) { changes = Math.abs(sentencePart.split(" ").length - originalSentence.split(" ").length) + 1; } else { String[] originalWords = originalSentence.split(" "); String[] correctedWords = sentencePart.split(" "); for (int i = 0; i < originalWords.length; i++) { if (!originalWords[i].equals(correctedWords[i])) { changes++; } } } // Calculate the longest common subsequence int lcsLength = longestCommonSubsequence(originalWords, shuffledWords); // Number of changes needed is the difference in length minus LCS length int changes = (originalWords.length - lcsLength) + (shuffledWords.length - lcsLength); changesMap.put(sentence, changes); } // Sort by number of changes in ascending order List<Map.Entry<String, Integer>> sortedList = new ArrayList<>(changesMap.entrySet()); sortedList.sort(Map.Entry.comparingByValue()); for (Map.Entry<String, Integer> entry : sortedList) { System.out.println(">> " + entry.getKey() + " | Changes: " + entry.getValue()); outputCallback.accept(">> " + entry.getKey() + " | Changes: " + entry.getValue()+"\n"); } } Loading @@ -215,16 +237,17 @@ public class Corrector { try { String content = new String(Files.readAllBytes(Paths.get(path))); Corrector corrector = new Corrector(metadataPath); // Run corrector corrector.setCallback(System.out::println); String[] sentences = TextProcessor.extractSentences(content).toArray(new String[0]); for (String sentence : sentences) { sentence = sentence.replaceAll("[^a-zA-Z0-9\\s]", ""); String[] corrected = corrector.correct(sentence); if (corrected.length == 0) { System.out.println(sentence + " | No corrections needed."); outputCallback.accept(sentence + " | No corrections needed.\n"); continue; } System.out.println(sentence + " | Corrected Sentence Suggestions:"); printSentencesInOrderOfChanges(corrected, sentence); outputCallback.accept(sentence + " | Corrected Sentence Suggestions:"); corrector.printSentencesInOrderOfChanges(corrected, sentence); } } catch (IOException e) { System.err.println("Error reading file: " + e.getMessage()); Loading Loading
src/main/java/edu/bu/LanguageCorrection/Corrector.java +43 −20 Original line number Diff line number Diff line Loading @@ -10,15 +10,22 @@ import java.util.List; import java.util.Map; import java.util.ArrayList; import java.util.PriorityQueue; import java.util.function.Consumer; import java.util.Collections; import java.util.HashMap; public class Corrector { private TrieNode detector; private static Consumer<String> outputCallback; public Corrector(String metadataPath) { detector = loadFile(metadataPath); outputCallback = System.out::println; } public void setCallback(Consumer<String> callback) { outputCallback = callback; } public TrieNode getDetector() { Loading Loading @@ -174,37 +181,52 @@ public class Corrector { } } private static void printSentencesInOrderOfChanges(String[] sentences, String originalSentence) { private static int longestCommonSubsequence(String[] originalWords, String[] shuffledWords) { int n = originalWords.length; int m = shuffledWords.length; int[][] dp = new int[n + 1][m + 1]; for (int i = 1; i <= n; i++) { for (int j = 1; j <= m; j++) { if (originalWords[i - 1].equals(shuffledWords[j - 1])) { dp[i][j] = dp[i - 1][j - 1] + 1; } else { dp[i][j] = Math.max(dp[i - 1][j], dp[i][j - 1]); } } } return dp[n][m]; } public void printSentencesInOrderOfChanges(String[] sentences, String originalSentence) { // Order the sentences by the number of changes needed Map<String, Integer> changesMap = new HashMap<>(); String[] originalWords = originalSentence.split(" "); for (String sentence : sentences) { if (sentence == null || sentence.isEmpty()) continue; // Skip empty sentences (not in the 0.5 percentile of the best sentence) if (sentence == null || sentence.isEmpty()) { continue; } String sentencePart = sentence.split(" | Score: ")[0]; // Remove score (if present String sentencePart = sentence.split(" \\| Score: ")[0]; // Remove score, if present String[] shuffledWords = sentencePart.split(" "); int changes = 0; if (sentencePart.length() != originalSentence.length()) { changes = Math.abs(sentencePart.split(" ").length - originalSentence.split(" ").length) + 1; } else { String[] originalWords = originalSentence.split(" "); String[] correctedWords = sentencePart.split(" "); for (int i = 0; i < originalWords.length; i++) { if (!originalWords[i].equals(correctedWords[i])) { changes++; } } } // Calculate the longest common subsequence int lcsLength = longestCommonSubsequence(originalWords, shuffledWords); // Number of changes needed is the difference in length minus LCS length int changes = (originalWords.length - lcsLength) + (shuffledWords.length - lcsLength); changesMap.put(sentence, changes); } // Sort by number of changes in ascending order List<Map.Entry<String, Integer>> sortedList = new ArrayList<>(changesMap.entrySet()); sortedList.sort(Map.Entry.comparingByValue()); for (Map.Entry<String, Integer> entry : sortedList) { System.out.println(">> " + entry.getKey() + " | Changes: " + entry.getValue()); outputCallback.accept(">> " + entry.getKey() + " | Changes: " + entry.getValue()+"\n"); } } Loading @@ -215,16 +237,17 @@ public class Corrector { try { String content = new String(Files.readAllBytes(Paths.get(path))); Corrector corrector = new Corrector(metadataPath); // Run corrector corrector.setCallback(System.out::println); String[] sentences = TextProcessor.extractSentences(content).toArray(new String[0]); for (String sentence : sentences) { sentence = sentence.replaceAll("[^a-zA-Z0-9\\s]", ""); String[] corrected = corrector.correct(sentence); if (corrected.length == 0) { System.out.println(sentence + " | No corrections needed."); outputCallback.accept(sentence + " | No corrections needed.\n"); continue; } System.out.println(sentence + " | Corrected Sentence Suggestions:"); printSentencesInOrderOfChanges(corrected, sentence); outputCallback.accept(sentence + " | Corrected Sentence Suggestions:"); corrector.printSentencesInOrderOfChanges(corrected, sentence); } } catch (IOException e) { System.err.println("Error reading file: " + e.getMessage()); Loading