Loading Crawler/src/main/java/org/example/TrieNode.java 0 → 100644 +54 −0 Original line number Diff line number Diff line package org.example; import java.util.HashMap; public class TrieNode { HashMap<String, TrieNode> children = new HashMap<>(); int count = 0; int childCounts = 0; public void insert(String[] phrase) { TrieNode current = this; TrieNode past = this; // Store the previous node for (String word : phrase) { past = current; current = current.children.computeIfAbsent(word, c -> new TrieNode()); } current.count += 1; past.childCounts += 1; } public float probability(String phrase) { TrieNode current = this; TrieNode past = this; for (String word : phrase.split(" ")) { past = current; current = current.children.get(word); if (current == null) { // System.out.println("Phrase not found in trie."); return 0; } } // System.out.println("Probability of phrase: " + (double) current.count / // past.childCounts); return (float) current.count / past.childCounts; } public float perplexity(String phrase) { TrieNode current = this; TrieNode past = this; float logProb = 0; for (String word : phrase.split(" ")) { past = current; current = current.children.get(word); if (current == null) { // System.out.println("Phrase not found in trie."); return Float.MAX_VALUE; } logProb += Math.log((float) current.count / past.childCounts); } float perplexity = (float) Math.pow(2, -logProb); // System.out.println("Perplexity of phrase: " + perplexity); return perplexity; } } Crawler/src/main/java/org/example/crawler.java +2 −51 Original line number Diff line number Diff line Loading @@ -7,7 +7,6 @@ import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import java.util.HashMap; import java.util.HashSet; import java.util.LinkedList; import java.util.Map; Loading @@ -21,57 +20,9 @@ import java.io.IOException; import java.nio.charset.StandardCharsets; import java.util.Arrays; public class crawler { static class TrieNode { HashMap<String, TrieNode> children = new HashMap<>(); int count = 0; int childCounts = 0; public void insert(String[] phrase) { TrieNode current = this; TrieNode past = this; // Store the previous node for (String word : phrase) { past = current; current = current.children.computeIfAbsent(word, c -> new TrieNode()); } current.count += 1; past.childCounts += 1; } public float probability(String phrase) { TrieNode current = this; TrieNode past = this; for (String word : phrase.split(" ")) { past = current; current = current.children.get(word); if (current == null) { // System.out.println("Phrase not found in trie."); return 0; } } // System.out.println("Probability of phrase: " + (double) current.count / past.childCounts); return (float) current.count / past.childCounts; } import org.example.TrieNode; public float perplexity(String phrase) { TrieNode current = this; TrieNode past = this; float logProb = 0; for (String word : phrase.split(" ")) { past = current; current = current.children.get(word); if (current == null) { // System.out.println("Phrase not found in trie."); return Float.MAX_VALUE; } logProb += Math.log((float) current.count / past.childCounts); } float perplexity = (float) Math.pow(2, -logProb); // System.out.println("Perplexity of phrase: " + perplexity); return perplexity; } } public class crawler { public static void main(String[] args) throws IOException { // Initialize web crawler Loading Loading
Crawler/src/main/java/org/example/TrieNode.java 0 → 100644 +54 −0 Original line number Diff line number Diff line package org.example; import java.util.HashMap; public class TrieNode { HashMap<String, TrieNode> children = new HashMap<>(); int count = 0; int childCounts = 0; public void insert(String[] phrase) { TrieNode current = this; TrieNode past = this; // Store the previous node for (String word : phrase) { past = current; current = current.children.computeIfAbsent(word, c -> new TrieNode()); } current.count += 1; past.childCounts += 1; } public float probability(String phrase) { TrieNode current = this; TrieNode past = this; for (String word : phrase.split(" ")) { past = current; current = current.children.get(word); if (current == null) { // System.out.println("Phrase not found in trie."); return 0; } } // System.out.println("Probability of phrase: " + (double) current.count / // past.childCounts); return (float) current.count / past.childCounts; } public float perplexity(String phrase) { TrieNode current = this; TrieNode past = this; float logProb = 0; for (String word : phrase.split(" ")) { past = current; current = current.children.get(word); if (current == null) { // System.out.println("Phrase not found in trie."); return Float.MAX_VALUE; } logProb += Math.log((float) current.count / past.childCounts); } float perplexity = (float) Math.pow(2, -logProb); // System.out.println("Perplexity of phrase: " + perplexity); return perplexity; } }
Crawler/src/main/java/org/example/crawler.java +2 −51 Original line number Diff line number Diff line Loading @@ -7,7 +7,6 @@ import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import java.util.HashMap; import java.util.HashSet; import java.util.LinkedList; import java.util.Map; Loading @@ -21,57 +20,9 @@ import java.io.IOException; import java.nio.charset.StandardCharsets; import java.util.Arrays; public class crawler { static class TrieNode { HashMap<String, TrieNode> children = new HashMap<>(); int count = 0; int childCounts = 0; public void insert(String[] phrase) { TrieNode current = this; TrieNode past = this; // Store the previous node for (String word : phrase) { past = current; current = current.children.computeIfAbsent(word, c -> new TrieNode()); } current.count += 1; past.childCounts += 1; } public float probability(String phrase) { TrieNode current = this; TrieNode past = this; for (String word : phrase.split(" ")) { past = current; current = current.children.get(word); if (current == null) { // System.out.println("Phrase not found in trie."); return 0; } } // System.out.println("Probability of phrase: " + (double) current.count / past.childCounts); return (float) current.count / past.childCounts; } import org.example.TrieNode; public float perplexity(String phrase) { TrieNode current = this; TrieNode past = this; float logProb = 0; for (String word : phrase.split(" ")) { past = current; current = current.children.get(word); if (current == null) { // System.out.println("Phrase not found in trie."); return Float.MAX_VALUE; } logProb += Math.log((float) current.count / past.childCounts); } float perplexity = (float) Math.pow(2, -logProb); // System.out.println("Perplexity of phrase: " + perplexity); return perplexity; } } public class crawler { public static void main(String[] args) throws IOException { // Initialize web crawler Loading