Commit 7ea368c0 authored by Seyed Reza  Sajjadinasab's avatar Seyed Reza Sajjadinasab
Browse files

addTypoCorrector

parent f3363693
Loading
Loading
Loading
Loading
+29 −4
Original line number Diff line number Diff line
@@ -7,6 +7,7 @@ import DirectedGraph.DirectedGraph;

import java.sql.*;
import StateMachine.*;
import TypoCorrector.TypoCorrector;

public class DBinterface {
    public void checkTokenInDatabase(String sentence, DirectedGraph<State> graph){
@@ -14,6 +15,9 @@ public class DBinterface {
        sentence = sentence.replaceAll("\\p{Punct}", " $0");
        String[] tokens = sentence.split("\\s+");
        String url = "jdbc:sqlite:./SQLite/mydatabase.db";
        String dicFileName = "./SQLite/smallDic.txt";
        TypoCorrector typoChecker =  TypoCorrector.of(dicFileName);
        int initialConf = 0;
        try (Connection connection = DriverManager.getConnection(url)) {

            // Lookup each token in the database and categorize it
@@ -21,14 +25,35 @@ public class DBinterface {
                String token = tokens[i];
                
                try (Statement statement = connection.createStatement()) {
                    
                    String query = "SELECT role FROM word_roles WHERE word = '" + token + "';";
                    String role = new String();
                    
                    ResultSet resultSet = statement.executeQuery(query);
                    if (resultSet.next()) {
                        String role = resultSet.getString("role");
                        role = resultSet.getString("role");
                        System.out.print("first try: " + token + " -> " + role);
                        tokens[i] = role;
                    }else{
                        String tokenCorrected = new String();
                        if(role.isEmpty()){
                            tokenCorrected = typoChecker.closestWord(token);
                            if(!tokenCorrected.equals(token))
                                initialConf += 5;
                            System.out.print("Corrected token: " + token + " -> " + tokenCorrected);
                            query = "SELECT role FROM word_roles WHERE word = '" + tokenCorrected + "';";
                            // Replace the token with its role
                            resultSet = statement.executeQuery(query);
                            if (resultSet.next()) {
                                role = resultSet.getString("role");
                                System.out.print("| Second try: "+ token + " -> " + role);
                                tokens[i] = role;
                            }
                        }

                    } 
                    }
                    System.out.println();
            }

            List<State> actions = new ArrayList<>();
@@ -41,7 +66,7 @@ public class DBinterface {

            // Check if the sequence of actions follows the state machine

            int confidence = SM.isStateMachineFollowed(graph, actions, initialState);
            int confidence = SM.isStateMachineFollowed(graph, actions, initialState, initialConf);
            System.out.print("The confidence score is: "+ confidence + "\n");
        } catch (SQLException e) {
            e.printStackTrace();
(12 KiB)

File changed.

No diff preview for this file type.

SQLite/smallDic.txt

0 → 100644
+210687 −0

File added.

Preview size limit exceeded, changes collapsed.

+2 −2
Original line number Diff line number Diff line
@@ -4,8 +4,8 @@ import java.util.List;

import DirectedGraph.DirectedGraph;
public class StateMachine{
    public int isStateMachineFollowed(DirectedGraph<State> graph, List<State> actions, State initialState) {
        int confidence = 0;
    public int isStateMachineFollowed(DirectedGraph<State> graph, List<State> actions, State initialState, int initialConf) {
        int confidence = initialConf;
        State currentState = initialState;
        for (State action : actions) {
            List<State> transitions = graph.getAdjacentNodes(currentState);
+96 −0
Original line number Diff line number Diff line
package TypoCorrector;

import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;

public class TypoCorrector {
    TypoCorrector(String filePath){
        dic = new ArrayList<>();
        readWordsFromFile(filePath);
    }

    public static TypoCorrector of (String filename){
        return new TypoCorrector(filename);
    }
    public void readWordsFromFile(String filePath) {
        try (BufferedReader br = new BufferedReader(new FileReader(filePath))) {
            String line;
            while ((line = br.readLine()) != null) {
                line = line.replaceAll("[^a-zA-Z ]", "").toLowerCase(); // Remove non-alphabetic characters and convert to lowercase
                String[] words = line.split("\\s+"); // Split the line by spaces
                for (String word : words) {
                    if (!word.isEmpty()) {
                        dic.add(word); // Add the word to the dictionary
                    }
                }
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
    
    public String closestWord(String word) {
        if(word.equals(".") || word.equals(","))
            return word;
        int curMax = -(1<<30);
        String wordMax = new String();
        int indCur = 0;
        for(String dicS: dic){
            int m = dicS.length();
            int n = word.length();
            int[][] scoreMat = new int[m+1][n+1];
            int[][] dirMat   = new int[m+1][n+1];

            for(int i=1; i<=n; i++){
                scoreMat[0][i] = (i-1)*gapExtend+gapOpen;
            }
            for(int i=1; i<=m; i++){
                scoreMat[i][0] = (i-1)*insertGapExtend+insertGapOpen;
            }
            for(int i=1; i<=m; i++){
                for(int j=1; j<=n; j++){
                    int curScore = 0;
                    int up   = scoreMat[i][j-1] + ((dirMat[i][j-1]==2 || dirMat[i][j-1]==0)?gapExtend:gapOpen);
                    int left = scoreMat[i-1][j] + ((dirMat[i-1][j]==3 || dirMat[i-1][j]==0)?insertGapExtend:insertGapOpen);
                    int diag = scoreMat[i-1][j-1] + (word.charAt(j-1)==dicS.charAt(i-1)?matchScore:mismatchScore);


                    curScore = diag;
                    dirMat[i][j]= 1;

                    if(up>curScore){
                    curScore = up;
                    dirMat[i][j]= 2;
                    }
                    if(left>curScore){
                        dirMat[i][j]= 3;
                        curScore = left;
                    }
                    scoreMat[i][j] = curScore;
                }
            }

            int disCur = scoreMat[m][n];
            if(disCur>curMax){
                wordMax = dicS;
                curMax = disCur;
            }
            indCur ++;
        }
        
        
        return (curMax<4 && wordMax.length()>0)?wordMax:word;
        
    }
    ArrayList<String> dic;
    final int mismatchScore  = -2;
    final int matchScore = 0;
    final int gapOpen   = -2;
    final int gapExtend  = -1;
    final int insert  = -1;
    final int insertGapOpen  = -2;
    final int insertGapExtend  = -1;
    final int minusInf = -(1<<4);
}
Loading