Commit fe408833 authored by Manuel  Segimon's avatar Manuel Segimon
Browse files

cleanup

parent 2c34f35c
Loading
Loading
Loading
Loading
+54 −0
Original line number Diff line number Diff line
package org.example;

import java.util.HashMap;

public class TrieNode {
    HashMap<String, TrieNode> children = new HashMap<>();
    int count = 0;
    int childCounts = 0;

    public void insert(String[] phrase) {
        TrieNode current = this;
        TrieNode past = this; // Store the previous node
        for (String word : phrase) {
            past = current;
            current = current.children.computeIfAbsent(word, c -> new TrieNode());
        }
        current.count += 1;
        past.childCounts += 1;
    }

    public float probability(String phrase) {
        TrieNode current = this;
        TrieNode past = this;
        for (String word : phrase.split(" ")) {
            past = current;
            current = current.children.get(word);
            if (current == null) {
                // System.out.println("Phrase not found in trie.");
                return 0;
            }
        }
        // System.out.println("Probability of phrase: " + (double) current.count /
        // past.childCounts);
        return (float) current.count / past.childCounts;
    }

    public float perplexity(String phrase) {
        TrieNode current = this;
        TrieNode past = this;
        float logProb = 0;
        for (String word : phrase.split(" ")) {
            past = current;
            current = current.children.get(word);
            if (current == null) {
                // System.out.println("Phrase not found in trie.");
                return Float.MAX_VALUE;
            }
            logProb += Math.log((float) current.count / past.childCounts);
        }
        float perplexity = (float) Math.pow(2, -logProb);
        // System.out.println("Perplexity of phrase: " + perplexity);
        return perplexity;
    }
}
+2 −51
Original line number Diff line number Diff line
@@ -7,7 +7,6 @@ import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.Map;
@@ -21,57 +20,9 @@ import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;

public class crawler {

    static class TrieNode {
        HashMap<String, TrieNode> children = new HashMap<>();
        int count = 0;
        int childCounts = 0;

        public void insert(String[] phrase) {
            TrieNode current = this;
            TrieNode past = this; // Store the previous node
            for (String word : phrase) {
                past = current;
                current = current.children.computeIfAbsent(word, c -> new TrieNode());
            }
            current.count += 1;
            past.childCounts += 1;
        }

        public float probability(String phrase) {
            TrieNode current = this;
            TrieNode past = this;
            for (String word : phrase.split(" ")) {
                past = current;
                current = current.children.get(word);
                if (current == null) {
                    // System.out.println("Phrase not found in trie.");
                    return 0;
                }
            }
            // System.out.println("Probability of phrase: " + (double) current.count / past.childCounts);
            return (float) current.count / past.childCounts;
        }
import org.example.TrieNode;

        public float perplexity(String phrase) {
            TrieNode current = this;
            TrieNode past = this;
            float logProb = 0;
            for (String word : phrase.split(" ")) {
                past = current;
                current = current.children.get(word);
                if (current == null) {
                    // System.out.println("Phrase not found in trie.");
                    return Float.MAX_VALUE;
                }
                logProb += Math.log((float) current.count / past.childCounts);
            }
            float perplexity = (float) Math.pow(2, -logProb);
            // System.out.println("Perplexity of phrase: " + perplexity);
            return perplexity;
        }
    }
public class crawler {

    public static void main(String[] args) throws IOException {
        // Initialize web crawler