Commit b83aedd6 authored by Seyed Reza  Sajjadinasab's avatar Seyed Reza Sajjadinasab
Browse files

Merge branch 'Reza' into 'master'

Reza

See merge request ec504/ec504_projects/group6!1
parents 294e74be 597c7c76
Loading
Loading
Loading
Loading

.gitignore

0 → 100644
+7 −0
Original line number Diff line number Diff line
bin
*.json
*.jar
test.*
*.txt
*db
!manifest*
 No newline at end of file
+108 −0
Original line number Diff line number Diff line
import java.io.IOException;
import java.security.NoSuchAlgorithmException;
import java.sql.SQLException;
import java.util.HashMap;
import java.util.List;

import HashTableMaker.HashTableMaker;
import DirectedGraph.BasicGraph;
import DirectedGraph.DirectedGraph;
import HashTableMaker.HashTableMaker;
import StateMachine.*;
import DBinterface.DBinterface;
import util.*;

public class Checker {
     public static void main(String[] args) {
        //DirectedGraph<State> graph = new DirectedGraph<>();
        
        ArgumentParser argPars = ArgumentParser.of(args);
        BasicGraph basicGraphClass = new BasicGraph();
        DBinterface dbInterface = new DBinterface();
        DirectedGraph<State> graph = basicGraphClass.getGraph();
        JsonMaker jsonMaker = JsonMaker.create();
        if(argPars.isUpdateHashTable()){
            if(argPars.isCheckFile()){
                SentenceExtractor extractor = SentenceExtractor.of(argPars.getFileName());
                List<String> extractedSentences = extractor.getSentences();  
                
                try {
                    HashTableMaker manager = new HashTableMaker();
                    for (String sentence : extractedSentences) {
                        manager.updateDatabase(sentence.toLowerCase());
                        PhraseExtractor extractorPhrase = PhraseExtractor.fromSentence(sentence, 1, 4);
                        List<String> phrases = extractorPhrase.getPhrases();
                        for (String phrase : phrases) {
                            manager.updateDatabase(phrase.toLowerCase());
                        }                        
                    }
                    manager.closeConnection();
                } catch (SQLException | NoSuchAlgorithmException e) {
                    e.printStackTrace();
                }
            }else if(argPars.isCheckSentence()){
                try {
                    HashTableMaker manager = new HashTableMaker();
                    manager.updateDatabase(argPars.getSentence().toLowerCase());
                    for (String phrase : PhraseExtractor.fromSentence(argPars.getSentence(),1, 4).getPhrases()) {
                        manager.updateDatabase(phrase.toLowerCase());                      
                    }
                    manager.closeConnection();
                } catch (SQLException | NoSuchAlgorithmException e) {
                    e.printStackTrace();
                }
            }
        }else if(argPars.isCheckFile()){
            SentenceExtractor extractor = SentenceExtractor.of(argPars.getFileName());
            List<String> extractedSentences = extractor.getSentences();  
            try {
                HashTableMaker manager = new HashTableMaker();
                for (String sentence : extractedSentences) {
                    System.out.println("Sentence: " + sentence);
                    
                    System.out.println("*********************************************************");
                    PhraseExtractor extractorPhrase = PhraseExtractor.fromSentence(sentence);
                    List<String> phrases = extractorPhrase.getPhrases();
                    int ngram        = manager.nGram(sentence, 3);
                    int stateMachine = dbInterface.checkTokenInDatabase(sentence.toLowerCase(), graph);
                    int conf         = (ngram>=0)?(int)(ngram*0.2+stateMachine*0.8):stateMachine;
                    jsonMaker.addSentence(sentence.toLowerCase(), conf);
                    for (String phrase : phrases) {
                        System.out.println("Phrase: "+ phrase);
                        ngram        = manager.nGram(phrase.toLowerCase(), 3);
                        stateMachine = dbInterface.checkTokenInDatabase(phrase.toLowerCase(), graph);
                        conf         = (ngram>=0)?(int)(ngram*0.2+stateMachine*0.8):stateMachine;
                        jsonMaker.addPhrase(phrase.toLowerCase(), conf);
                        System.out.println("------------------------------------------------------------");
                        
                    }
                    
                    jsonMaker.toJson("data.json");
                    System.out.println("##########################################################");
                    
                }
            } catch (SQLException e) {
                e.printStackTrace();
            }
        }else if(argPars.isCheckSentence()){

            System.out.println("Sentence: " + argPars.getSentence());
            jsonMaker.addSentence(argPars.getSentence().toLowerCase(), dbInterface.checkTokenInDatabase(argPars.getSentence().toLowerCase(), graph));
            System.out.println("*********************************************************");
            PhraseExtractor extractorPhrase = PhraseExtractor.fromSentence(argPars.getSentence().toLowerCase());
            List<String> phrases = extractorPhrase.getPhrases();
            for (String phrase : phrases) {
                System.out.println("Phrase: " + phrase);
                jsonMaker.addPhrase(phrase, dbInterface.checkTokenInDatabase(phrase.toLowerCase(), graph));
                System.out.println("------------------------------------------------------------");
            }
            jsonMaker.toJson("data.json");
            System.out.println("##########################################################");
        }
          
    }
}

//javac -d bin Checker.java **/*.java
//java -cp bin:SQLite/sqlite-jdbc-3.45.2.0.jar:SQLite/slf4j-api-1.7.36.jar:SQLite/slf4j-jdk14-1.7.36.jar Checker 
//jar cvfm checker.jar manifest.txt -C bin . -C SQLite .
+57 −0
Original line number Diff line number Diff line
import java.io.IOException;
import java.util.List;

import DBinterface.DBinterface;
import DirectedGraph.BasicGraph;
import DirectedGraph.DirectedGraph;
import util.ArgumentParser;
import util.JsonMaker;
import util.PhraseExtractor;
import util.SentenceExtractor;
import util.StringFileWriter;
import StateMachine.*;


public class Corrector {
    public static void main(String[] args) {
        //DirectedGraph<State> graph = new DirectedGraph<>();
        
        ArgumentParser argPars = ArgumentParser.of(args);
        BasicGraph basicGraphClass = new BasicGraph();
        DBinterface dbInterface = new DBinterface();
        DirectedGraph<State> graph = basicGraphClass.getGraph();
        StringFileWriter stringWriter = StringFileWriter.of("corrected.txt");

        if(argPars.isCheckFile()){
            SentenceExtractor extractor = SentenceExtractor.of(argPars.getFileName());
            List<String> extractedSentences = extractor.getSentences();  
            
            
            for (String sentence : extractedSentences) {
                System.out.println("Sentence: " + sentence);
                stringWriter.appendString(dbInterface.correctTokenInDatabase(sentence.toLowerCase(), graph));

                try {
                    stringWriter.writeToFile();
                    System.out.println("Corrected version has been written to the file.");
                } catch (IOException e) {
                    System.err.println("An error occurred while writing to the file: " + e.getMessage());
                }
                System.out.println("##########################################################");
                
            }
        }else if(argPars.isCheckSentence()){

            System.out.println("Sentence: " + argPars.getSentence());
            stringWriter.appendString(dbInterface.correctTokenInDatabase(argPars.getSentence().toLowerCase(), graph));
            try {
                stringWriter.writeToFile();
                System.out.println("Corrected version has been written to the file.");
            } catch (IOException e) {
                System.err.println("An error occurred while writing to the file: " + e.getMessage());
            }
            System.out.println("##########################################################");
        }
          
    }
}
+218 −0
Original line number Diff line number Diff line
package DBinterface;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

import DirectedGraph.DirectedGraph;

import java.sql.*;
import StateMachine.*;
import TypoCorrector.TypoCorrector;
import util.TwoListStruct;


public class DBinterface {
    public int checkTokenInDatabase(String sentence, DirectedGraph<State> graph){
        StateMachine SM = new StateMachine();
        sentence = sentence.replaceAll("\\p{Punct}", " $0");
        String[] tokens = sentence.split("\\s+");
        String url = "jdbc:sqlite:./SQLite/mydatabase.db";
        String dicFileName = "./SQLite/smallDic.txt";
        TypoCorrector typoChecker =  TypoCorrector.of(dicFileName);
        int initialConf = 0;
        try (Connection connection = DriverManager.getConnection(url)) {

            // Lookup each token in the database and categorize it
            for (int i = 0; i < tokens.length; i++) {
                String token = tokens[i];
                
                try (Statement statement = connection.createStatement()) {
                    
                    String query = "SELECT role FROM word_roles WHERE word = '" + token + "';";
                    String role = new String();
                    
                    ResultSet resultSet = statement.executeQuery(query);
                    if (resultSet.next()) {
                        role = resultSet.getString("role");
                        ////System.out.print("first try: " + token + " -> " + role);
                        tokens[i] = role;
                    }else{
                        String tokenCorrected = new String();
                        if(role.isEmpty()){
                            tokenCorrected = typoChecker.closestWord(token);
                            if(!tokenCorrected.equals(token))
                                initialConf += 5;
                            ////System.out.print("Corrected token: " + token + " -> " + tokenCorrected);
                            query = "SELECT role FROM word_roles WHERE word = '" + tokenCorrected + "';";
                            // Replace the token with its role
                            resultSet = statement.executeQuery(query);
                            if (resultSet.next()) {
                                role = resultSet.getString("role");
                                ////System.out.print("| Second try: "+ token + " -> " + role);
                                tokens[i] = role;
                            }
                        }

                    } 
                    }
                    ////System.out.println();
            }

            List<State> actions = new ArrayList<>();

            for(String token: tokens){
                actions.add(State.fromString(token));
            }
            // Define the initial state
            State initialState = State.START;

            // Check if the sequence of actions follows the state machine

            int confidence = SM.isStateMachineFollowed(graph, actions, initialState, initialConf);
            //System.out.print("The confidence score is: "+ confidence + "\n");
            return confidence;
        } catch (SQLException e) {
            e.printStackTrace();
        }
        return 0;
    }

    public String correctTokenInDatabase(String sentence, DirectedGraph<State> graph){
        for(int i=0; i<2; i++){
            sentence = new String(correctTokenInDatabaseInnerloop(sentence, graph));
            if(checkTokenInDatabase(sentence, graph)<10)
                break;
        }
        return sentence;
    }

    private String correctTokenInDatabaseInnerloop(String sentence, DirectedGraph<State> graph){
        StateMachine SM = new StateMachine();
        sentence = sentence.replaceAll("\\p{Punct}", " $0");
        String[] tokens = sentence.split("\\s+");
        String[] tokensCopy = tokens.clone();
        List<String> tokenList = new ArrayList<>(Arrays.asList(tokensCopy));
        String url = "jdbc:sqlite:./SQLite/mydatabase.db";
        String dicFileName = "./SQLite/smallDic.txt";
        TypoCorrector typoChecker =  TypoCorrector.of(dicFileName);
        int initialConf = 0;
        try (Connection connection = DriverManager.getConnection(url)) {

            // Lookup each token in the database and categorize it
            for (int i = 0; i < tokens.length; i++) {
                String token = tokens[i];
                
                try (Statement statement = connection.createStatement()) {
                    
                    String query = "SELECT role FROM word_roles WHERE word = '" + token + "';";
                    String role = new String();
                    
                    ResultSet resultSet = statement.executeQuery(query);
                    if (resultSet.next()) {
                        role = resultSet.getString("role");
                        //////System.out.print("first try: " + token + " -> " + role);
                        tokens[i] = role;
                    }else{
                        String tokenCorrected = new String();
                        if(role.isEmpty()){
                            tokenCorrected = typoChecker.closestWord(token);
                            if(!tokenCorrected.equals(token))
                                initialConf += 5;
                           // ////System.out.print("Corrected token: " + token + " -> " + tokenCorrected);

                            query = "SELECT role FROM word_roles WHERE word = '" + tokenCorrected + "';";
                            // Replace the token with its role
                            resultSet = statement.executeQuery(query);
                            if (resultSet.next()) {
                                tokenList.set(i,tokenCorrected);
                                role = resultSet.getString("role");
                              //  ////System.out.print("| Second try: "+ token + " -> " + role);
                                tokens[i] = role;
                            }
                        }

                    } 
                    }
                    //////System.out.println();
            }

            List<State> actions = new ArrayList<>();

            for(String token: tokens){
                actions.add(State.fromString(token));
            }
            // Define the initial state
            State initialState = State.START;

            // Check if the sequence of actions follows the state machine

            TwoListStruct<State, Integer> output = SM.suggestedStateMachine(graph, actions, initialState);
           // output.displayArrays();
            List<State> suggested = output.getOutputList();
            List<Integer> flags   = output.getChangesList();
            int delCnt = 0;
            boolean seenDot = false;
            int     indDotseen = Math.max(suggested.size()+1, flags.size()+1);
            for(int i=0; i<suggested.size(); i++){
                if(seenDot){
                    //indDotseen = i;
                   // break;
                }
                if(suggested.get(i) == State.DOT)
                    seenDot = true;
                if(flags.get(i+delCnt)==1){
                    try (Statement statement = connection.createStatement()) {
                        String query = "SELECT word FROM word_roles WHERE role = '" + suggested.get(i) + "';";
                        String word = new String();
                        ResultSet resultSet = statement.executeQuery(query);
                        
                        ////System.out.print("!!! 1: " + resultSet + "| ");
                        if (resultSet.next()) {
                            word = resultSet.getString("word");
                            ////System.out.println("Here I am: "+ word);
                            if(i<tokenList.size())
                                tokenList.set(i,word);
                            else
                                tokenList.add(word);
                        }
                    }
                }else if(flags.get(i+delCnt)==2){
                    delCnt++;
                    tokenList.remove(i);
                }else if(flags.get(i+delCnt)==3){
                    try (Statement statement = connection.createStatement()) {
                        ////System.out.println(suggested.get(i));
                        String query = "SELECT word FROM word_roles WHERE role = '" + suggested.get(i) + "';";
                        String word = new String();
                        ResultSet resultSet = statement.executeQuery(query);
                        if (resultSet.next()) {
                            word = resultSet.getString("word");
                            ////System.out.println("Here I am: "+ word);
                            if(i<tokenList.size())
                                tokenList.add(i,word);
                            else
                                tokenList.add(word);
                        }
                    }
                }
            }   
            StringBuilder result = new StringBuilder();
            boolean flagStart = false;
            int i = 0;
            for (String token : tokenList) {
                if(i==indDotseen)
                    break;
                if(flagStart && !token.equals(".") && !token.equals(","))
                    result.append(" ");
                result.append(token);
                flagStart = true;  
                i++;
            }
            return result.toString();
        } catch (SQLException e) {
            e.printStackTrace();
        }
        return new String();
    }
}
+70 −0
Original line number Diff line number Diff line
package DirectedGraph;
import StateMachine.State;

public class BasicGraph {
    public DirectedGraph<State> graph;
    public BasicGraph(){
        graph = new DirectedGraph<>();
        makeBasicGraph();
    }
    public void makeBasicGraph(){
        State cur = State.first();
        for(int i=0; i<State.values().length; i++){
            graph.addNode(cur);
            cur = cur.next();
        }

        cur = State.first();
        for(int i=1; i<State.values().length; i++){
            cur = cur.next();
            graph.addEdge(State.first(), cur);
        }

        graph.addEdge(State.START,     State.PRONOUN);
        graph.addEdge(State.PRONOUN,   State.VERB);
        graph.addEdge(State.VERB,      State.ADVERB);
        graph.addEdge(State.ADVERB,    State.ADJECTIVE);
        graph.addEdge(State.VERB,      State.ARTICLE);
        graph.addEdge(State.ARTICLE,   State.ADVERB);
        graph.addEdge(State.ARTICLE,   State.ADJECTIVE);
        graph.addEdge(State.ARTICLE,   State.NOUN);
        graph.addEdge(State.ADJECTIVE, State.DOT);
        graph.addEdge(State.ADJECTIVE, State.NOUN);
        graph.addEdge(State.ADJECTIVE, State.COMMA);
        graph.addEdge(State.NOUN,      State.DOT);
        graph.addEdge(State.NOUN,      State.COMMA);
        graph.addEdge(State.DOT,       State.END);
        //graph.addEdge(State.COMMA,     State.PRONOUN);
        graph.addEdge(State.COMMA,     State.CONJ);
        graph.addEdge(State.CONJ,      State.PRONOUN);
        graph.addEdge(State.PRONOUN,   State.CAN);
        graph.addEdge(State.CAN,       State.VERB);
        graph.addEdge(State.IF,        State.PRONOUN);
        graph.addEdge(State.THAT,      State.PRONOUN);
        graph.addEdge(State.IF,        State.NOUN);
        graph.addEdge(State.THAT,      State.NOUN);
        graph.addEdge(State.NOUN,      State.VERB);
        graph.addEdge(State.NOUN,      State.CAN);
        //graph.addEdge(State.VERB,      State.NOT);
        graph.addEdge(State.PRONOUN,   State.DOES);
        graph.addEdge(State.NOUN,      State.DOES);
        graph.addEdge(State.DOES,      State.NOT);
        graph.addEdge(State.NOT,       State.VERB);
        graph.addEdge(State.NOT,       State.ADVERB);
        graph.addEdge(State.NOT,       State.ADJECTIVE);
        graph.addEdge(State.NOT,       State.ARTICLE);
        graph.addEdge(State.OF,        State.NOUN);
        graph.addEdge(State.NOUN,      State.OF);
        graph.addEdge(State.NOUN,      State.IS);
        graph.addEdge(State.PRONOUN,   State.IS);
        graph.addEdge(State.IS,        State.ADJECTIVE);
        graph.addEdge(State.IS,        State.ADVERB);
        graph.addEdge(State.IS,        State.ARTICLE);
        graph.addEdge(State.THAT,      State.IF);


    }
    public DirectedGraph<State> getGraph() {
        return graph;
    }
}
Loading