Loading CheckerCorrector/Checker.java +19 −1 Original line number Diff line number Diff line Loading @@ -21,7 +21,25 @@ public class Checker { DBinterface dbInterface = new DBinterface(); DirectedGraph<State> graph = basicGraphClass.getGraph(); JsonMaker jsonMaker = JsonMaker.create(); if(argPars.isUpdateHashTable()){ if(argPars.isUpdateToken()){ if(argPars.isCheckFile()){ SentenceExtractor extractor = SentenceExtractor.of(argPars.getFileName()); List<String> extractedSentences = extractor.getSentences(); for (String sentence : extractedSentences) { dbInterface.updateTokenInDatabase(sentence.toLowerCase(), graph); PhraseExtractor extractorPhrase = PhraseExtractor.fromSentence(sentence, 3, 5); List<String> phrases = extractorPhrase.getPhrases(); for (String phrase : phrases) { dbInterface.updateTokenInDatabase(phrase.toLowerCase(), graph); } } }else if(argPars.isCheckSentence()){ dbInterface.updateTokenInDatabase(argPars.getSentence().toLowerCase(), graph); for (String phrase : PhraseExtractor.fromSentence(argPars.getSentence(),3, 5).getPhrases()) { //dbInterface.updateTokenInDatabase(phrase.toLowerCase(), graph); } } }else if(argPars.isUpdateHashTable()){ if(argPars.isCheckFile()){ SentenceExtractor extractor = SentenceExtractor.of(argPars.getFileName()); List<String> extractedSentences = extractor.getSentences(); Loading CheckerCorrector/DBinterface/DBinterface.java +117 −0 Original line number Diff line number Diff line Loading @@ -215,4 +215,121 @@ public class DBinterface { } return new String(); } public void updateTokenInDatabase(String sentence, DirectedGraph<State> graph){ StateMachine SM = new StateMachine(); sentence = sentence.replaceAll("\\p{Punct}", " $0"); String[] tokens = sentence.split("\\s+"); String[] tokensCopy = tokens.clone(); List<Boolean> missFlag = new ArrayList(); List<String> tokenList = new ArrayList<>(Arrays.asList(tokensCopy)); String url = "jdbc:sqlite:./SQLite/mydatabase.db"; String urlinsert = "jdbc:sqlite:./SQLite/newdatabase.db"; String dicFileName = "./SQLite/smallDic.txt"; TypoCorrector typoChecker = TypoCorrector.of(dicFileName); int initialConf = 0; int cntMiss = 0; try (Connection connection = DriverManager.getConnection(url)) { // Lookup each token in the database and categorize it for (int i = 0; i < tokens.length; i++) { String token = tokens[i]; System.out.print("\nBefor token: "+tokens[i]+"| "); try (Statement statement = connection.createStatement()) { String query = "SELECT role FROM word_roles WHERE word = '" + token + "';"; String role = new String(); ResultSet resultSet = statement.executeQuery(query); if (resultSet.next()) { role = resultSet.getString("role"); //////System.out.print("first try: " + token + " -> " + role); System.out.println("role: " + role); if(!role.equals("NAN")){ tokens[i] = role; missFlag.add(false); }else{ missFlag.add(true); cntMiss ++; } }else{ String tokenCorrected = new String(); if(role.isEmpty()){ tokenCorrected = typoChecker.closestWord(token); if(!tokenCorrected.equals(token)) initialConf += 5; // ////System.out.print("Corrected token: " + token + " -> " + tokenCorrected); query = "SELECT role FROM word_roles WHERE word = '" + tokenCorrected + "';"; // Replace the token with its role resultSet = statement.executeQuery(query); if (resultSet.next()) { tokenList.set(i,tokenCorrected); role = resultSet.getString("role"); // ////System.out.print("| Second try: "+ token + " -> " + role); System.out.println("role: " + role); if(!role.equals("NAN")){ tokens[i] = role; missFlag.add(false); }else{ missFlag.add(true); cntMiss ++; } }else{ missFlag.add(true); cntMiss ++; } } } } System.out.print("After token: "+tokens[i]+"| "); //////System.out.println(); } System.out.println("\nMISS: "+cntMiss); if(cntMiss<3 && cntMiss>0){ List<State> actions = new ArrayList<>(); for(String token: tokens){ actions.add(State.fromString(token)); } // Define the initial state State initialState = State.START; // Check if the sequence of actions follows the state machine List<State> suggested = SM.updateDB(graph, actions, initialState); System.out.println(actions); System.out.println(suggested); System.out.println("---------------------------"); int cntUp=0; for(int i=0; i<suggested.size(); i++){ if(!suggested.get(i).toString().equals(tokens[i])) cntUp++; } System.out.println(missFlag); if(cntUp<3){ for(int i=0; i<tokens.length; i++){ if(missFlag.get(i)){ if(!suggested.get(i).toString().equals(tokens[i])){ System.out.println(tokens[i] + "| " + suggested.get(i)); String sql = "INSERT INTO word_roles VALUES(?, ?);"; try (Connection conn = DriverManager.getConnection(urlinsert); PreparedStatement pstmt = conn.prepareStatement(sql)) { // Insert the first record pstmt.setString(1, tokens[i]); pstmt.setString(2, suggested.get(i).toString()); pstmt.executeUpdate(); } catch (SQLException e) { System.out.println(e.getMessage()); } } } } } } } catch (SQLException e) { e.printStackTrace(); } } } CheckerCorrector/DirectedGraph/BasicGraph.java +2 −0 Original line number Diff line number Diff line Loading @@ -53,6 +53,7 @@ public class BasicGraph { graph.addEdge(State.NOT, State.ADVERB); graph.addEdge(State.NOT, State.ADJECTIVE); graph.addEdge(State.NOT, State.ARTICLE); graph.addEdge(State.NOT, State.DOT); graph.addEdge(State.OF, State.NOUN); graph.addEdge(State.NOUN, State.OF); graph.addEdge(State.NOUN, State.IS); Loading @@ -60,6 +61,7 @@ public class BasicGraph { graph.addEdge(State.IS, State.ADJECTIVE); graph.addEdge(State.IS, State.ADVERB); graph.addEdge(State.IS, State.ARTICLE); graph.addEdge(State.IS, State.NOT); graph.addEdge(State.THAT, State.IF); Loading CheckerCorrector/DirectedGraph/DFS.java +5 −5 Original line number Diff line number Diff line Loading @@ -19,13 +19,13 @@ public class DFS { return new DFS(); } public Set<String> dfs(DirectedGraph<State> graph, State currentState, int maxDepth){ public Set<String> dfs(DirectedGraph<State> graph, State currentState, int maxDepth, int minDepth){ List<State> path = new ArrayList<>(); path.add(currentState); dfsRecurssion(graph, currentState, 0, maxDepth, path); dfsRecurssion(graph, currentState, 0, maxDepth, path, minDepth); return allPaths; } private void dfsRecurssion(DirectedGraph<State> graph, State currentState, int depth, int maxDepth, List<State> path) { private void dfsRecurssion(DirectedGraph<State> graph, State currentState, int depth, int maxDepth, List<State> path, int minDepth) { if ((currentState == State.DOT)) { ListToString lTS = ListToString.of(); StringBuilder sb = new StringBuilder(); Loading @@ -34,7 +34,7 @@ public class DFS { lTS.addString(p); cnt++; } if(cnt>(maxDepth-4)) if(cnt>(maxDepth-minDepth)) allPaths.add(lTS.getString()); return; }else if(depth >= maxDepth){ Loading @@ -43,7 +43,7 @@ public class DFS { List<State> transitions = graph.getAdjacentNodes(currentState); for (State nextState : transitions) { path.add(nextState); dfsRecurssion(graph, nextState, depth + 1, maxDepth, path); dfsRecurssion(graph, nextState, depth + 1, maxDepth, path, minDepth); path.remove(path.size() - 1); } } Loading CheckerCorrector/SQLite/mydatabase.db (12 KiB) File changed.No diff preview for this file type. View original file View changed file Loading
CheckerCorrector/Checker.java +19 −1 Original line number Diff line number Diff line Loading @@ -21,7 +21,25 @@ public class Checker { DBinterface dbInterface = new DBinterface(); DirectedGraph<State> graph = basicGraphClass.getGraph(); JsonMaker jsonMaker = JsonMaker.create(); if(argPars.isUpdateHashTable()){ if(argPars.isUpdateToken()){ if(argPars.isCheckFile()){ SentenceExtractor extractor = SentenceExtractor.of(argPars.getFileName()); List<String> extractedSentences = extractor.getSentences(); for (String sentence : extractedSentences) { dbInterface.updateTokenInDatabase(sentence.toLowerCase(), graph); PhraseExtractor extractorPhrase = PhraseExtractor.fromSentence(sentence, 3, 5); List<String> phrases = extractorPhrase.getPhrases(); for (String phrase : phrases) { dbInterface.updateTokenInDatabase(phrase.toLowerCase(), graph); } } }else if(argPars.isCheckSentence()){ dbInterface.updateTokenInDatabase(argPars.getSentence().toLowerCase(), graph); for (String phrase : PhraseExtractor.fromSentence(argPars.getSentence(),3, 5).getPhrases()) { //dbInterface.updateTokenInDatabase(phrase.toLowerCase(), graph); } } }else if(argPars.isUpdateHashTable()){ if(argPars.isCheckFile()){ SentenceExtractor extractor = SentenceExtractor.of(argPars.getFileName()); List<String> extractedSentences = extractor.getSentences(); Loading
CheckerCorrector/DBinterface/DBinterface.java +117 −0 Original line number Diff line number Diff line Loading @@ -215,4 +215,121 @@ public class DBinterface { } return new String(); } public void updateTokenInDatabase(String sentence, DirectedGraph<State> graph){ StateMachine SM = new StateMachine(); sentence = sentence.replaceAll("\\p{Punct}", " $0"); String[] tokens = sentence.split("\\s+"); String[] tokensCopy = tokens.clone(); List<Boolean> missFlag = new ArrayList(); List<String> tokenList = new ArrayList<>(Arrays.asList(tokensCopy)); String url = "jdbc:sqlite:./SQLite/mydatabase.db"; String urlinsert = "jdbc:sqlite:./SQLite/newdatabase.db"; String dicFileName = "./SQLite/smallDic.txt"; TypoCorrector typoChecker = TypoCorrector.of(dicFileName); int initialConf = 0; int cntMiss = 0; try (Connection connection = DriverManager.getConnection(url)) { // Lookup each token in the database and categorize it for (int i = 0; i < tokens.length; i++) { String token = tokens[i]; System.out.print("\nBefor token: "+tokens[i]+"| "); try (Statement statement = connection.createStatement()) { String query = "SELECT role FROM word_roles WHERE word = '" + token + "';"; String role = new String(); ResultSet resultSet = statement.executeQuery(query); if (resultSet.next()) { role = resultSet.getString("role"); //////System.out.print("first try: " + token + " -> " + role); System.out.println("role: " + role); if(!role.equals("NAN")){ tokens[i] = role; missFlag.add(false); }else{ missFlag.add(true); cntMiss ++; } }else{ String tokenCorrected = new String(); if(role.isEmpty()){ tokenCorrected = typoChecker.closestWord(token); if(!tokenCorrected.equals(token)) initialConf += 5; // ////System.out.print("Corrected token: " + token + " -> " + tokenCorrected); query = "SELECT role FROM word_roles WHERE word = '" + tokenCorrected + "';"; // Replace the token with its role resultSet = statement.executeQuery(query); if (resultSet.next()) { tokenList.set(i,tokenCorrected); role = resultSet.getString("role"); // ////System.out.print("| Second try: "+ token + " -> " + role); System.out.println("role: " + role); if(!role.equals("NAN")){ tokens[i] = role; missFlag.add(false); }else{ missFlag.add(true); cntMiss ++; } }else{ missFlag.add(true); cntMiss ++; } } } } System.out.print("After token: "+tokens[i]+"| "); //////System.out.println(); } System.out.println("\nMISS: "+cntMiss); if(cntMiss<3 && cntMiss>0){ List<State> actions = new ArrayList<>(); for(String token: tokens){ actions.add(State.fromString(token)); } // Define the initial state State initialState = State.START; // Check if the sequence of actions follows the state machine List<State> suggested = SM.updateDB(graph, actions, initialState); System.out.println(actions); System.out.println(suggested); System.out.println("---------------------------"); int cntUp=0; for(int i=0; i<suggested.size(); i++){ if(!suggested.get(i).toString().equals(tokens[i])) cntUp++; } System.out.println(missFlag); if(cntUp<3){ for(int i=0; i<tokens.length; i++){ if(missFlag.get(i)){ if(!suggested.get(i).toString().equals(tokens[i])){ System.out.println(tokens[i] + "| " + suggested.get(i)); String sql = "INSERT INTO word_roles VALUES(?, ?);"; try (Connection conn = DriverManager.getConnection(urlinsert); PreparedStatement pstmt = conn.prepareStatement(sql)) { // Insert the first record pstmt.setString(1, tokens[i]); pstmt.setString(2, suggested.get(i).toString()); pstmt.executeUpdate(); } catch (SQLException e) { System.out.println(e.getMessage()); } } } } } } } catch (SQLException e) { e.printStackTrace(); } } }
CheckerCorrector/DirectedGraph/BasicGraph.java +2 −0 Original line number Diff line number Diff line Loading @@ -53,6 +53,7 @@ public class BasicGraph { graph.addEdge(State.NOT, State.ADVERB); graph.addEdge(State.NOT, State.ADJECTIVE); graph.addEdge(State.NOT, State.ARTICLE); graph.addEdge(State.NOT, State.DOT); graph.addEdge(State.OF, State.NOUN); graph.addEdge(State.NOUN, State.OF); graph.addEdge(State.NOUN, State.IS); Loading @@ -60,6 +61,7 @@ public class BasicGraph { graph.addEdge(State.IS, State.ADJECTIVE); graph.addEdge(State.IS, State.ADVERB); graph.addEdge(State.IS, State.ARTICLE); graph.addEdge(State.IS, State.NOT); graph.addEdge(State.THAT, State.IF); Loading
CheckerCorrector/DirectedGraph/DFS.java +5 −5 Original line number Diff line number Diff line Loading @@ -19,13 +19,13 @@ public class DFS { return new DFS(); } public Set<String> dfs(DirectedGraph<State> graph, State currentState, int maxDepth){ public Set<String> dfs(DirectedGraph<State> graph, State currentState, int maxDepth, int minDepth){ List<State> path = new ArrayList<>(); path.add(currentState); dfsRecurssion(graph, currentState, 0, maxDepth, path); dfsRecurssion(graph, currentState, 0, maxDepth, path, minDepth); return allPaths; } private void dfsRecurssion(DirectedGraph<State> graph, State currentState, int depth, int maxDepth, List<State> path) { private void dfsRecurssion(DirectedGraph<State> graph, State currentState, int depth, int maxDepth, List<State> path, int minDepth) { if ((currentState == State.DOT)) { ListToString lTS = ListToString.of(); StringBuilder sb = new StringBuilder(); Loading @@ -34,7 +34,7 @@ public class DFS { lTS.addString(p); cnt++; } if(cnt>(maxDepth-4)) if(cnt>(maxDepth-minDepth)) allPaths.add(lTS.getString()); return; }else if(depth >= maxDepth){ Loading @@ -43,7 +43,7 @@ public class DFS { List<State> transitions = graph.getAdjacentNodes(currentState); for (State nextState : transitions) { path.add(nextState); dfsRecurssion(graph, nextState, depth + 1, maxDepth, path); dfsRecurssion(graph, nextState, depth + 1, maxDepth, path, minDepth); path.remove(path.size() - 1); } } Loading
CheckerCorrector/SQLite/mydatabase.db (12 KiB) File changed.No diff preview for this file type. View original file View changed file