Commit 82a7c7cc authored by Seyed Reza  Sajjadinasab's avatar Seyed Reza Sajjadinasab
Browse files

updateTokenUsingCrawledData

parent 7d777840
Loading
Loading
Loading
Loading
+19 −1
Original line number Diff line number Diff line
@@ -21,7 +21,25 @@ public class Checker {
        DBinterface dbInterface = new DBinterface();
        DirectedGraph<State> graph = basicGraphClass.getGraph();
        JsonMaker jsonMaker = JsonMaker.create();
        if(argPars.isUpdateHashTable()){
        if(argPars.isUpdateToken()){
            if(argPars.isCheckFile()){
                SentenceExtractor extractor = SentenceExtractor.of(argPars.getFileName());
                List<String> extractedSentences = extractor.getSentences();  
                for (String sentence : extractedSentences) {
                    dbInterface.updateTokenInDatabase(sentence.toLowerCase(), graph);
                    PhraseExtractor extractorPhrase = PhraseExtractor.fromSentence(sentence, 3, 5);
                    List<String> phrases = extractorPhrase.getPhrases();
                    for (String phrase : phrases) {
                        dbInterface.updateTokenInDatabase(phrase.toLowerCase(), graph);
                    }                        
                }
            }else if(argPars.isCheckSentence()){
                dbInterface.updateTokenInDatabase(argPars.getSentence().toLowerCase(), graph);
                for (String phrase : PhraseExtractor.fromSentence(argPars.getSentence(),3, 5).getPhrases()) {
                    //dbInterface.updateTokenInDatabase(phrase.toLowerCase(), graph);                    
                }
            }
        }else if(argPars.isUpdateHashTable()){
            if(argPars.isCheckFile()){
                SentenceExtractor extractor = SentenceExtractor.of(argPars.getFileName());
                List<String> extractedSentences = extractor.getSentences();  
+117 −0
Original line number Diff line number Diff line
@@ -215,4 +215,121 @@ public class DBinterface {
        }
        return new String();
    }

    public void updateTokenInDatabase(String sentence, DirectedGraph<State> graph){
        StateMachine SM = new StateMachine();
        sentence = sentence.replaceAll("\\p{Punct}", " $0");
        String[] tokens = sentence.split("\\s+");
        String[] tokensCopy = tokens.clone();
        List<Boolean> missFlag = new ArrayList();
        List<String> tokenList = new ArrayList<>(Arrays.asList(tokensCopy));
        String url       = "jdbc:sqlite:./SQLite/mydatabase.db";
        String urlinsert = "jdbc:sqlite:./SQLite/newdatabase.db";
        String dicFileName = "./SQLite/smallDic.txt";
        TypoCorrector typoChecker =  TypoCorrector.of(dicFileName);
        int initialConf = 0;
        int cntMiss = 0;
        try (Connection connection = DriverManager.getConnection(url)) {

            // Lookup each token in the database and categorize it
            for (int i = 0; i < tokens.length; i++) {
                String token = tokens[i];
                System.out.print("\nBefor token: "+tokens[i]+"| ");
                try (Statement statement = connection.createStatement()) {
                    
                    String query = "SELECT role FROM word_roles WHERE word = '" + token + "';";
                    String role = new String();
                    
                    ResultSet resultSet = statement.executeQuery(query);
                    if (resultSet.next()) {
                        role = resultSet.getString("role");
                        //////System.out.print("first try: " + token + " -> " + role);
                        System.out.println("role: " + role);
                        if(!role.equals("NAN")){
                            tokens[i] = role;
                            missFlag.add(false);
                        }else{
                            missFlag.add(true);
                            cntMiss ++;
                        }
                    }else{
                        String tokenCorrected = new String();
                        if(role.isEmpty()){
                            tokenCorrected = typoChecker.closestWord(token);
                            if(!tokenCorrected.equals(token))
                                initialConf += 5;
                           // ////System.out.print("Corrected token: " + token + " -> " + tokenCorrected);

                            query = "SELECT role FROM word_roles WHERE word = '" + tokenCorrected + "';";
                            // Replace the token with its role
                            resultSet = statement.executeQuery(query);
                            if (resultSet.next()) {
                                tokenList.set(i,tokenCorrected);
                                role = resultSet.getString("role");
                               //  ////System.out.print("| Second try: "+ token + " -> " + role);
                                System.out.println("role: " + role);
                                if(!role.equals("NAN")){
                                    tokens[i] = role;
                                    missFlag.add(false);
                                }else{
                                    missFlag.add(true);
                                    cntMiss ++;
                                }
                            }else{
                                missFlag.add(true);
                                cntMiss ++;
                            }
                        }

                    } 
                    }
                    System.out.print("After token: "+tokens[i]+"| ");
                    //////System.out.println();
            }
            System.out.println("\nMISS: "+cntMiss);
            if(cntMiss<3 && cntMiss>0){
                List<State> actions = new ArrayList<>();

                for(String token: tokens){
                    actions.add(State.fromString(token));
                }
                // Define the initial state
                State initialState = State.START;
                
                // Check if the sequence of actions follows the state machine
                List<State> suggested = SM.updateDB(graph, actions, initialState);
                System.out.println(actions);
                System.out.println(suggested);
                System.out.println("---------------------------");
                int cntUp=0;
                for(int i=0; i<suggested.size(); i++){
                    if(!suggested.get(i).toString().equals(tokens[i]))
                        cntUp++;
                }
                System.out.println(missFlag);
                if(cntUp<3){
                    for(int i=0; i<tokens.length; i++){
                        if(missFlag.get(i)){
                            if(!suggested.get(i).toString().equals(tokens[i])){
                                System.out.println(tokens[i] + "| " + suggested.get(i));
                                String sql = "INSERT INTO word_roles VALUES(?, ?);";
                                try (Connection conn = DriverManager.getConnection(urlinsert);
                                    PreparedStatement pstmt = conn.prepareStatement(sql)) {
                                    // Insert the first record
                                    pstmt.setString(1, tokens[i]);
                                    pstmt.setString(2, suggested.get(i).toString());
                                    pstmt.executeUpdate();
                                    
                                } catch (SQLException e) {
                                    System.out.println(e.getMessage());
                                }
                            }
                        }
                    }
                }
            }
        } catch (SQLException e) {
            e.printStackTrace();
        }
    }
}
+2 −0
Original line number Diff line number Diff line
@@ -53,6 +53,7 @@ public class BasicGraph {
        graph.addEdge(State.NOT,       State.ADVERB);
        graph.addEdge(State.NOT,       State.ADJECTIVE);
        graph.addEdge(State.NOT,       State.ARTICLE);
        graph.addEdge(State.NOT,       State.DOT);
        graph.addEdge(State.OF,        State.NOUN);
        graph.addEdge(State.NOUN,      State.OF);
        graph.addEdge(State.NOUN,      State.IS);
@@ -60,6 +61,7 @@ public class BasicGraph {
        graph.addEdge(State.IS,        State.ADJECTIVE);
        graph.addEdge(State.IS,        State.ADVERB);
        graph.addEdge(State.IS,        State.ARTICLE);
        graph.addEdge(State.IS,        State.NOT);
        graph.addEdge(State.THAT,      State.IF);


+5 −5
Original line number Diff line number Diff line
@@ -19,13 +19,13 @@ public class DFS {
        return new DFS();
    }

    public Set<String> dfs(DirectedGraph<State> graph, State currentState, int maxDepth){
    public Set<String> dfs(DirectedGraph<State> graph, State currentState, int maxDepth, int minDepth){
        List<State> path = new ArrayList<>();
        path.add(currentState);
        dfsRecurssion(graph, currentState, 0, maxDepth, path);
        dfsRecurssion(graph, currentState, 0, maxDepth, path, minDepth);
        return allPaths;
    }
    private void dfsRecurssion(DirectedGraph<State> graph, State currentState, int depth, int maxDepth, List<State> path) {
    private void dfsRecurssion(DirectedGraph<State> graph, State currentState, int depth, int maxDepth, List<State> path, int minDepth) {
        if ((currentState == State.DOT)) {
            ListToString lTS =  ListToString.of();
            StringBuilder sb = new StringBuilder();
@@ -34,7 +34,7 @@ public class DFS {
                lTS.addString(p);
                cnt++;
            }
            if(cnt>(maxDepth-4))
            if(cnt>(maxDepth-minDepth))
                allPaths.add(lTS.getString());
            return;
        }else if(depth >= maxDepth){
@@ -43,7 +43,7 @@ public class DFS {
        List<State> transitions = graph.getAdjacentNodes(currentState);
        for (State nextState : transitions) {
            path.add(nextState);
            dfsRecurssion(graph, nextState, depth + 1, maxDepth, path);
            dfsRecurssion(graph, nextState, depth + 1, maxDepth, path, minDepth);
            path.remove(path.size() - 1);
        }
    }
(12 KiB)

File changed.

No diff preview for this file type.

Loading