Commit 41f82c4e authored by Seyed Reza  Sajjadinasab's avatar Seyed Reza Sajjadinasab
Browse files

Merge branch 'Reza' into 'master'

fixBugSimilarity

See merge request ec504/ec504_projects/group6!56
parents d1a5e6e6 8578c703
Loading
Loading
Loading
Loading
+36 −24
Original line number Diff line number Diff line
@@ -15,6 +15,7 @@ import java.sql.*;
import StateMachine.*;
import TypoCorrector.FilePrefixComparator;
import TypoCorrector.TypoCorrector;
import TypoCorrector.WordCountSorter;
import util.TwoListStruct;
import util.StringFileWriter;
import util.StringProcessor;
@@ -236,15 +237,17 @@ public class DBinterface {
                            if (resultSet.next()) {
                                word = resultSet.getString("word");
                                ////System.out.println("Here I am: "+ word);
                                if(i+biasToken-1>0){
                                if(i+biasToken>0){
                                    String nextToken = word;
                                    String queryy = "SELECT role FROM word_roles WHERE word = '" + nextToken + "';";
                                    ResultSet resultSett = statement.executeQuery(queryy);
                                    if (resultSett.next()) {
                                        String roleNext = resultSett.getString("role");
                                        wordPairDatabase.bfsAndGetWords(tokenList.get(i+biasToken-1), 2);
                                        FilePrefixComparator findSimilarity =  FilePrefixComparator.of("similarity_words.txt");
                                        String tokenCorrected = findSimilarity.findBestMatchingPrefix(nextToken);
                                        wordPairDatabase.bfsAndWriteCountsToFile(tokenList.get(i+biasToken-1), 2);
                                        WordCountSorter wordCountSorter = WordCountSorter.of("similarity_words.txt");
                                        List<String> possibleWords = wordCountSorter.getSortedWordsByCount();
                                        //System.out.println(possibleWords);
                                        for(String tokenCorrected: possibleWords){
                                            if(!nextToken.equals(tokenCorrected)){
                                                String queryNext = "SELECT role FROM word_roles WHERE word = '" + tokenCorrected + "';";
                                                ResultSet resultSetNext = statement.executeQuery(queryNext);
@@ -252,6 +255,8 @@ public class DBinterface {
                                                    String roleNextNext = resultSett.getString("role");
                                                    if(roleNextNext.equals(roleNext)){
                                                        word = tokenCorrected;
                                                        break;
                                                    }
                                                }
                                            }
                                        }
@@ -272,7 +277,7 @@ public class DBinterface {
                                    
                                }else{
                                    if(flagsCorrection.isEmpty()){
                                        sfw.appendString("(INSERTION INTO INDEX): "+ i + " -> "+ word + "*");
                                        sfw.appendString("(INSERTION INTO THE END) -> "+ word + "*");
                                        tokenList.add(word);
                                    }else if(flagsCorrection.get(flagsCorrectioncnt)){
                                        tokenList.add(word);
@@ -306,15 +311,17 @@ public class DBinterface {
                            if (resultSet.next()) {
                                word = resultSet.getString("word");
                                ////System.out.println("Here I am: "+ word);
                                if(i+biasToken-1>0){
                                if(i+biasToken>0){
                                    String nextToken = word;
                                    String queryy = "SELECT role FROM word_roles WHERE word = '" + nextToken + "';";
                                    ResultSet resultSett = statement.executeQuery(queryy);
                                    if (resultSett.next()) {
                                        String roleNext = resultSett.getString("role");
                                        wordPairDatabase.bfsAndGetWords(tokenList.get(i+biasToken-1), 2);
                                        FilePrefixComparator findSimilarity =  FilePrefixComparator.of("similarity_words.txt");
                                        String tokenCorrected = findSimilarity.findBestMatchingPrefix(nextToken);
                                        wordPairDatabase.bfsAndWriteCountsToFile(tokenList.get(i+biasToken-1), 2);
                                        WordCountSorter wordCountSorter = WordCountSorter.of("similarity_words.txt");
                                        List<String> possibleWords = wordCountSorter.getSortedWordsByCount();
                                       // System.out.println(possibleWords);
                                        for(String tokenCorrected: possibleWords){
                                            if(!nextToken.equals(tokenCorrected)){
                                                String queryNext = "SELECT role FROM word_roles WHERE word = '" + tokenCorrected + "';";
                                                ResultSet resultSetNext = statement.executeQuery(queryNext);
@@ -322,16 +329,20 @@ public class DBinterface {
                                                    String roleNextNext = resultSett.getString("role");
                                                    if(roleNextNext.equals(roleNext)){
                                                        word = tokenCorrected;
                                                        break;
                                                    }
                                                }
                                            }
                                        }
                                    }
                                }
                                if(flagsCorrection.isEmpty()){
                                    sfw.appendString("(INSERTION INTO THE END) -> "+ word + "*");
                                    
                                    if(i<tokenList.size()){
                                        sfw.appendString("(INSERTION INTO INDEX): "+ i + " -> "+ word + "*");
                                        tokenList.add(i+biasToken,word);
                                    }else{
                                        sfw.appendString("(INSERTION INTO THE END) -> "+ word + "*");
                                        tokenList.add(word);
                                    }
                                }else if(flagsCorrection.get(flagsCorrectioncnt)){
@@ -339,6 +350,7 @@ public class DBinterface {
                                    // System.out.println("here!");
                                        tokenList.add(i+biasToken,word);
                                    }else{
                                        //sfw.appendString("(INSERTION INTO THE END) -> "+ word + "*");
                                        tokenList.add(word);
                                    }
                                }else{
+53 −0
Original line number Diff line number Diff line
@@ -41,6 +41,7 @@ public class WordPairDatabase {
                while ((line = reader.readLine()) != null) {
                    line = line.toLowerCase();
                    line = StringProcessor.handleApostrophe(line);
                    line = line.replaceAll("\\p{Punct}", " $0");
                    String[] words = line.split("\\s+");
                    for (int i = 0; i < words.length - 1; i++) {
                        String word1 = words[i];
@@ -133,6 +134,58 @@ public class WordPairDatabase {
        return nextWords;
    }

    public void bfsAndWriteCountsToFile(String startWord, int depth) {
        String outputFile = "similarity_words.txt";
        Map<String, Integer> wordCounts = new HashMap<>();
        Queue<String> queue = new ArrayDeque<>();
        queue.add(startWord);

        try (Connection conn = DriverManager.getConnection(url)) {
            while (!queue.isEmpty() && depth > 0) {
                int size = queue.size();
                for (int i = 0; i < size; i++) {
                    String word = queue.poll();
                    for (Map.Entry<String, Integer> entry : getCountsForWord(conn, word).entrySet()) {
                        //String pair = word + " " + entry.getKey();
                        wordCounts.put(entry.getKey(), entry.getValue());
                        queue.add(entry.getKey());  // Add next word to queue for BFS
                    }
                }
                depth--;
            }
        } catch (SQLException e) {
            System.err.println("Error accessing database: " + e.getMessage());
        }

        // Write word counts to the output file
        //System.out.print("WORDCOUNTS: ");
        //System.out.println(wordCounts);
        try (BufferedWriter writer = new BufferedWriter(new FileWriter(outputFile))) {
            for (Map.Entry<String, Integer> entry : wordCounts.entrySet()) {
                writer.write(entry.getKey() + " " + entry.getValue() + "\n");
            }
        } catch (IOException e) {
            System.err.println("Error writing to file: " + e.getMessage());
        }
    }

    private Map<String, Integer> getCountsForWord(Connection conn, String word) {
        Map<String, Integer> counts = new HashMap<>();
        String sql = "SELECT word2, count FROM word_pairs WHERE word1 = ?";

        try (PreparedStatement stmt = conn.prepareStatement(sql)) {
            stmt.setString(1, word);
            ResultSet rs = stmt.executeQuery();
            while (rs.next()) {
                counts.put(rs.getString("word2"), rs.getInt("count"));
            }
        } catch (SQLException e) {
            System.err.println("Error retrieving counts for word: " + e.getMessage());
        }

        return counts;
    }

    public static void main(String[] args) {
        String url = "word_pairs.db";
        String fileName = "sentences.txt";
+47 −0
Original line number Diff line number Diff line
package TypoCorrector;

import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.*;

public class WordCountSorter {
    private String filename;

    private WordCountSorter(String filename) {
        this.filename = filename;
    }

    public static WordCountSorter of(String filename){
        return new WordCountSorter(filename);
    }

    public List<String> getSortedWordsByCount() {
        Map<String, Integer> wordCountMap = new HashMap<>();
        try (BufferedReader reader = new BufferedReader(new FileReader(filename))) {
            String line;
            while ((line = reader.readLine()) != null) {
                String[] parts = line.split("\\s+");
                if (parts.length == 2) {
                    String word = parts[0];
                    int count = Integer.parseInt(parts[1]);
                    wordCountMap.put(word, count);
                }
            }
        } catch (IOException e) {
            System.err.println("Error reading file: " + e.getMessage());
        }

        // Sort the map by value (count) in descending order
        List<Map.Entry<String, Integer>> sortedEntries = new ArrayList<>(wordCountMap.entrySet());
        sortedEntries.sort((e1, e2) -> e2.getValue().compareTo(e1.getValue()));

        // Extract sorted words from sorted entries
        List<String> sortedWords = new ArrayList<>();
        for (Map.Entry<String, Integer> entry : sortedEntries) {
            sortedWords.add(entry.getKey());
        }

        return sortedWords;
    }
}