Loading CheckerCorrector/DBinterface/DBinterface.java +36 −24 Original line number Diff line number Diff line Loading @@ -15,6 +15,7 @@ import java.sql.*; import StateMachine.*; import TypoCorrector.FilePrefixComparator; import TypoCorrector.TypoCorrector; import TypoCorrector.WordCountSorter; import util.TwoListStruct; import util.StringFileWriter; import util.StringProcessor; Loading Loading @@ -236,15 +237,17 @@ public class DBinterface { if (resultSet.next()) { word = resultSet.getString("word"); ////System.out.println("Here I am: "+ word); if(i+biasToken-1>0){ if(i+biasToken>0){ String nextToken = word; String queryy = "SELECT role FROM word_roles WHERE word = '" + nextToken + "';"; ResultSet resultSett = statement.executeQuery(queryy); if (resultSett.next()) { String roleNext = resultSett.getString("role"); wordPairDatabase.bfsAndGetWords(tokenList.get(i+biasToken-1), 2); FilePrefixComparator findSimilarity = FilePrefixComparator.of("similarity_words.txt"); String tokenCorrected = findSimilarity.findBestMatchingPrefix(nextToken); wordPairDatabase.bfsAndWriteCountsToFile(tokenList.get(i+biasToken-1), 2); WordCountSorter wordCountSorter = WordCountSorter.of("similarity_words.txt"); List<String> possibleWords = wordCountSorter.getSortedWordsByCount(); //System.out.println(possibleWords); for(String tokenCorrected: possibleWords){ if(!nextToken.equals(tokenCorrected)){ String queryNext = "SELECT role FROM word_roles WHERE word = '" + tokenCorrected + "';"; ResultSet resultSetNext = statement.executeQuery(queryNext); Loading @@ -252,6 +255,8 @@ public class DBinterface { String roleNextNext = resultSett.getString("role"); if(roleNextNext.equals(roleNext)){ word = tokenCorrected; break; } } } } Loading @@ -272,7 +277,7 @@ public class DBinterface { }else{ if(flagsCorrection.isEmpty()){ sfw.appendString("(INSERTION INTO INDEX): "+ i + " -> "+ word + "*"); sfw.appendString("(INSERTION INTO THE END) -> "+ word + "*"); tokenList.add(word); }else if(flagsCorrection.get(flagsCorrectioncnt)){ tokenList.add(word); Loading Loading @@ -306,15 +311,17 @@ public class DBinterface { if (resultSet.next()) { word = resultSet.getString("word"); ////System.out.println("Here I am: "+ word); if(i+biasToken-1>0){ if(i+biasToken>0){ String nextToken = word; String queryy = "SELECT role FROM word_roles WHERE word = '" + nextToken + "';"; ResultSet resultSett = statement.executeQuery(queryy); if (resultSett.next()) { String roleNext = resultSett.getString("role"); wordPairDatabase.bfsAndGetWords(tokenList.get(i+biasToken-1), 2); FilePrefixComparator findSimilarity = FilePrefixComparator.of("similarity_words.txt"); String tokenCorrected = findSimilarity.findBestMatchingPrefix(nextToken); wordPairDatabase.bfsAndWriteCountsToFile(tokenList.get(i+biasToken-1), 2); WordCountSorter wordCountSorter = WordCountSorter.of("similarity_words.txt"); List<String> possibleWords = wordCountSorter.getSortedWordsByCount(); // System.out.println(possibleWords); for(String tokenCorrected: possibleWords){ if(!nextToken.equals(tokenCorrected)){ String queryNext = "SELECT role FROM word_roles WHERE word = '" + tokenCorrected + "';"; ResultSet resultSetNext = statement.executeQuery(queryNext); Loading @@ -322,16 +329,20 @@ public class DBinterface { String roleNextNext = resultSett.getString("role"); if(roleNextNext.equals(roleNext)){ word = tokenCorrected; break; } } } } } } if(flagsCorrection.isEmpty()){ sfw.appendString("(INSERTION INTO THE END) -> "+ word + "*"); if(i<tokenList.size()){ sfw.appendString("(INSERTION INTO INDEX): "+ i + " -> "+ word + "*"); tokenList.add(i+biasToken,word); }else{ sfw.appendString("(INSERTION INTO THE END) -> "+ word + "*"); tokenList.add(word); } }else if(flagsCorrection.get(flagsCorrectioncnt)){ Loading @@ -339,6 +350,7 @@ public class DBinterface { // System.out.println("here!"); tokenList.add(i+biasToken,word); }else{ //sfw.appendString("(INSERTION INTO THE END) -> "+ word + "*"); tokenList.add(word); } }else{ Loading CheckerCorrector/SQLite/token_database_english.db (44 KiB) File changed.No diff preview for this file type. View original file View changed file CheckerCorrector/SQLite/word_similarity_english.db (52 KiB) File changed.No diff preview for this file type. View original file View changed file CheckerCorrector/SimilarityCorrector/WordPairDatabase.java +53 −0 Original line number Diff line number Diff line Loading @@ -41,6 +41,7 @@ public class WordPairDatabase { while ((line = reader.readLine()) != null) { line = line.toLowerCase(); line = StringProcessor.handleApostrophe(line); line = line.replaceAll("\\p{Punct}", " $0"); String[] words = line.split("\\s+"); for (int i = 0; i < words.length - 1; i++) { String word1 = words[i]; Loading Loading @@ -133,6 +134,58 @@ public class WordPairDatabase { return nextWords; } public void bfsAndWriteCountsToFile(String startWord, int depth) { String outputFile = "similarity_words.txt"; Map<String, Integer> wordCounts = new HashMap<>(); Queue<String> queue = new ArrayDeque<>(); queue.add(startWord); try (Connection conn = DriverManager.getConnection(url)) { while (!queue.isEmpty() && depth > 0) { int size = queue.size(); for (int i = 0; i < size; i++) { String word = queue.poll(); for (Map.Entry<String, Integer> entry : getCountsForWord(conn, word).entrySet()) { //String pair = word + " " + entry.getKey(); wordCounts.put(entry.getKey(), entry.getValue()); queue.add(entry.getKey()); // Add next word to queue for BFS } } depth--; } } catch (SQLException e) { System.err.println("Error accessing database: " + e.getMessage()); } // Write word counts to the output file //System.out.print("WORDCOUNTS: "); //System.out.println(wordCounts); try (BufferedWriter writer = new BufferedWriter(new FileWriter(outputFile))) { for (Map.Entry<String, Integer> entry : wordCounts.entrySet()) { writer.write(entry.getKey() + " " + entry.getValue() + "\n"); } } catch (IOException e) { System.err.println("Error writing to file: " + e.getMessage()); } } private Map<String, Integer> getCountsForWord(Connection conn, String word) { Map<String, Integer> counts = new HashMap<>(); String sql = "SELECT word2, count FROM word_pairs WHERE word1 = ?"; try (PreparedStatement stmt = conn.prepareStatement(sql)) { stmt.setString(1, word); ResultSet rs = stmt.executeQuery(); while (rs.next()) { counts.put(rs.getString("word2"), rs.getInt("count")); } } catch (SQLException e) { System.err.println("Error retrieving counts for word: " + e.getMessage()); } return counts; } public static void main(String[] args) { String url = "word_pairs.db"; String fileName = "sentences.txt"; Loading CheckerCorrector/TypoCorrector/WordCountSorter.java 0 → 100644 +47 −0 Original line number Diff line number Diff line package TypoCorrector; import java.io.BufferedReader; import java.io.FileReader; import java.io.IOException; import java.util.*; public class WordCountSorter { private String filename; private WordCountSorter(String filename) { this.filename = filename; } public static WordCountSorter of(String filename){ return new WordCountSorter(filename); } public List<String> getSortedWordsByCount() { Map<String, Integer> wordCountMap = new HashMap<>(); try (BufferedReader reader = new BufferedReader(new FileReader(filename))) { String line; while ((line = reader.readLine()) != null) { String[] parts = line.split("\\s+"); if (parts.length == 2) { String word = parts[0]; int count = Integer.parseInt(parts[1]); wordCountMap.put(word, count); } } } catch (IOException e) { System.err.println("Error reading file: " + e.getMessage()); } // Sort the map by value (count) in descending order List<Map.Entry<String, Integer>> sortedEntries = new ArrayList<>(wordCountMap.entrySet()); sortedEntries.sort((e1, e2) -> e2.getValue().compareTo(e1.getValue())); // Extract sorted words from sorted entries List<String> sortedWords = new ArrayList<>(); for (Map.Entry<String, Integer> entry : sortedEntries) { sortedWords.add(entry.getKey()); } return sortedWords; } } Loading
CheckerCorrector/DBinterface/DBinterface.java +36 −24 Original line number Diff line number Diff line Loading @@ -15,6 +15,7 @@ import java.sql.*; import StateMachine.*; import TypoCorrector.FilePrefixComparator; import TypoCorrector.TypoCorrector; import TypoCorrector.WordCountSorter; import util.TwoListStruct; import util.StringFileWriter; import util.StringProcessor; Loading Loading @@ -236,15 +237,17 @@ public class DBinterface { if (resultSet.next()) { word = resultSet.getString("word"); ////System.out.println("Here I am: "+ word); if(i+biasToken-1>0){ if(i+biasToken>0){ String nextToken = word; String queryy = "SELECT role FROM word_roles WHERE word = '" + nextToken + "';"; ResultSet resultSett = statement.executeQuery(queryy); if (resultSett.next()) { String roleNext = resultSett.getString("role"); wordPairDatabase.bfsAndGetWords(tokenList.get(i+biasToken-1), 2); FilePrefixComparator findSimilarity = FilePrefixComparator.of("similarity_words.txt"); String tokenCorrected = findSimilarity.findBestMatchingPrefix(nextToken); wordPairDatabase.bfsAndWriteCountsToFile(tokenList.get(i+biasToken-1), 2); WordCountSorter wordCountSorter = WordCountSorter.of("similarity_words.txt"); List<String> possibleWords = wordCountSorter.getSortedWordsByCount(); //System.out.println(possibleWords); for(String tokenCorrected: possibleWords){ if(!nextToken.equals(tokenCorrected)){ String queryNext = "SELECT role FROM word_roles WHERE word = '" + tokenCorrected + "';"; ResultSet resultSetNext = statement.executeQuery(queryNext); Loading @@ -252,6 +255,8 @@ public class DBinterface { String roleNextNext = resultSett.getString("role"); if(roleNextNext.equals(roleNext)){ word = tokenCorrected; break; } } } } Loading @@ -272,7 +277,7 @@ public class DBinterface { }else{ if(flagsCorrection.isEmpty()){ sfw.appendString("(INSERTION INTO INDEX): "+ i + " -> "+ word + "*"); sfw.appendString("(INSERTION INTO THE END) -> "+ word + "*"); tokenList.add(word); }else if(flagsCorrection.get(flagsCorrectioncnt)){ tokenList.add(word); Loading Loading @@ -306,15 +311,17 @@ public class DBinterface { if (resultSet.next()) { word = resultSet.getString("word"); ////System.out.println("Here I am: "+ word); if(i+biasToken-1>0){ if(i+biasToken>0){ String nextToken = word; String queryy = "SELECT role FROM word_roles WHERE word = '" + nextToken + "';"; ResultSet resultSett = statement.executeQuery(queryy); if (resultSett.next()) { String roleNext = resultSett.getString("role"); wordPairDatabase.bfsAndGetWords(tokenList.get(i+biasToken-1), 2); FilePrefixComparator findSimilarity = FilePrefixComparator.of("similarity_words.txt"); String tokenCorrected = findSimilarity.findBestMatchingPrefix(nextToken); wordPairDatabase.bfsAndWriteCountsToFile(tokenList.get(i+biasToken-1), 2); WordCountSorter wordCountSorter = WordCountSorter.of("similarity_words.txt"); List<String> possibleWords = wordCountSorter.getSortedWordsByCount(); // System.out.println(possibleWords); for(String tokenCorrected: possibleWords){ if(!nextToken.equals(tokenCorrected)){ String queryNext = "SELECT role FROM word_roles WHERE word = '" + tokenCorrected + "';"; ResultSet resultSetNext = statement.executeQuery(queryNext); Loading @@ -322,16 +329,20 @@ public class DBinterface { String roleNextNext = resultSett.getString("role"); if(roleNextNext.equals(roleNext)){ word = tokenCorrected; break; } } } } } } if(flagsCorrection.isEmpty()){ sfw.appendString("(INSERTION INTO THE END) -> "+ word + "*"); if(i<tokenList.size()){ sfw.appendString("(INSERTION INTO INDEX): "+ i + " -> "+ word + "*"); tokenList.add(i+biasToken,word); }else{ sfw.appendString("(INSERTION INTO THE END) -> "+ word + "*"); tokenList.add(word); } }else if(flagsCorrection.get(flagsCorrectioncnt)){ Loading @@ -339,6 +350,7 @@ public class DBinterface { // System.out.println("here!"); tokenList.add(i+biasToken,word); }else{ //sfw.appendString("(INSERTION INTO THE END) -> "+ word + "*"); tokenList.add(word); } }else{ Loading
CheckerCorrector/SQLite/token_database_english.db (44 KiB) File changed.No diff preview for this file type. View original file View changed file
CheckerCorrector/SQLite/word_similarity_english.db (52 KiB) File changed.No diff preview for this file type. View original file View changed file
CheckerCorrector/SimilarityCorrector/WordPairDatabase.java +53 −0 Original line number Diff line number Diff line Loading @@ -41,6 +41,7 @@ public class WordPairDatabase { while ((line = reader.readLine()) != null) { line = line.toLowerCase(); line = StringProcessor.handleApostrophe(line); line = line.replaceAll("\\p{Punct}", " $0"); String[] words = line.split("\\s+"); for (int i = 0; i < words.length - 1; i++) { String word1 = words[i]; Loading Loading @@ -133,6 +134,58 @@ public class WordPairDatabase { return nextWords; } public void bfsAndWriteCountsToFile(String startWord, int depth) { String outputFile = "similarity_words.txt"; Map<String, Integer> wordCounts = new HashMap<>(); Queue<String> queue = new ArrayDeque<>(); queue.add(startWord); try (Connection conn = DriverManager.getConnection(url)) { while (!queue.isEmpty() && depth > 0) { int size = queue.size(); for (int i = 0; i < size; i++) { String word = queue.poll(); for (Map.Entry<String, Integer> entry : getCountsForWord(conn, word).entrySet()) { //String pair = word + " " + entry.getKey(); wordCounts.put(entry.getKey(), entry.getValue()); queue.add(entry.getKey()); // Add next word to queue for BFS } } depth--; } } catch (SQLException e) { System.err.println("Error accessing database: " + e.getMessage()); } // Write word counts to the output file //System.out.print("WORDCOUNTS: "); //System.out.println(wordCounts); try (BufferedWriter writer = new BufferedWriter(new FileWriter(outputFile))) { for (Map.Entry<String, Integer> entry : wordCounts.entrySet()) { writer.write(entry.getKey() + " " + entry.getValue() + "\n"); } } catch (IOException e) { System.err.println("Error writing to file: " + e.getMessage()); } } private Map<String, Integer> getCountsForWord(Connection conn, String word) { Map<String, Integer> counts = new HashMap<>(); String sql = "SELECT word2, count FROM word_pairs WHERE word1 = ?"; try (PreparedStatement stmt = conn.prepareStatement(sql)) { stmt.setString(1, word); ResultSet rs = stmt.executeQuery(); while (rs.next()) { counts.put(rs.getString("word2"), rs.getInt("count")); } } catch (SQLException e) { System.err.println("Error retrieving counts for word: " + e.getMessage()); } return counts; } public static void main(String[] args) { String url = "word_pairs.db"; String fileName = "sentences.txt"; Loading
CheckerCorrector/TypoCorrector/WordCountSorter.java 0 → 100644 +47 −0 Original line number Diff line number Diff line package TypoCorrector; import java.io.BufferedReader; import java.io.FileReader; import java.io.IOException; import java.util.*; public class WordCountSorter { private String filename; private WordCountSorter(String filename) { this.filename = filename; } public static WordCountSorter of(String filename){ return new WordCountSorter(filename); } public List<String> getSortedWordsByCount() { Map<String, Integer> wordCountMap = new HashMap<>(); try (BufferedReader reader = new BufferedReader(new FileReader(filename))) { String line; while ((line = reader.readLine()) != null) { String[] parts = line.split("\\s+"); if (parts.length == 2) { String word = parts[0]; int count = Integer.parseInt(parts[1]); wordCountMap.put(word, count); } } } catch (IOException e) { System.err.println("Error reading file: " + e.getMessage()); } // Sort the map by value (count) in descending order List<Map.Entry<String, Integer>> sortedEntries = new ArrayList<>(wordCountMap.entrySet()); sortedEntries.sort((e1, e2) -> e2.getValue().compareTo(e1.getValue())); // Extract sorted words from sorted entries List<String> sortedWords = new ArrayList<>(); for (Map.Entry<String, Integer> entry : sortedEntries) { sortedWords.add(entry.getKey()); } return sortedWords; } }