Loading src/main/java/edu/bu/LanguageCorrection/TrieNode.java +42 −25 Original line number Diff line number Diff line Loading @@ -2,6 +2,7 @@ package edu.bu.LanguageCorrection; import java.io.Serializable; import java.util.HashMap; import java.util.Stack; public class TrieNode implements Serializable { HashMap<String, TrieNode> children = new HashMap<>(); Loading Loading @@ -54,44 +55,60 @@ public class TrieNode implements Serializable { } public byte[] serialize() { return serializeHelper(this).getBytes(); return serializeHelper(this).replaceAll(" \\}", "\\}").trim().getBytes(); } private String serializeHelper(TrieNode node) { StringBuilder serialized = new StringBuilder(); serialized.append(node.count).append(" ").append(node.childCounts).append(" "); serialized.append(node.count).append(" ").append(node.childCounts); if (node.childCounts == 0) { serialized.append(" "); // System.out.println("Serialized node: " + node.count + " " + node.childCounts); return serialized.toString(); } else { serialized.append("{"); } for (String key : node.children.keySet()) { System.out.println(" key: " + key); // System.out.println(" key: " + key); serialized.append(key).append(" ").append(serializeHelper(node.children.get(key))); } System.out.println("Serialized node: " + node.count + " " + node.childCounts); serialized.append("}"); // System.out.println("Serialized node: " + node.count + " " + node.childCounts); return serialized.toString(); } public TrieNode deserialize(byte[] serialized) { String serializedString = new String(serialized); String[] tokens = serializedString.split(" "); System.out.println("Deserialized metadata size: " + tokens.length + " tokens"); // Create Root TrieNode root = new TrieNode(); root.count = Integer.parseInt(tokens[0]); root.childCounts = Integer.parseInt(tokens[1]); for (int i = 2; i < tokens.length; i += 2) { root.children.put(tokens[i], deserializeHelper(tokens, i + 1)); } return root; public void deserialize(byte[] data) { String serializedString = new String(data); // Ensure proper spacing around '{' and '}' to correctly parse children serializedString = serializedString.replaceAll("\\{", " { ").replaceAll("\\}", " } ").trim(); String[] parts = serializedString.split("\\s+"); int[] index = { 0 }; // Clear current state before rebuilding this.children.clear(); deserializeHelper(this, parts, index); } // Helper method adapted for in-place deserialization private static void deserializeHelper(TrieNode node, String[] parts, int[] index) { if (index[0] >= parts.length) { throw new IllegalArgumentException("Unexpected end of serialized data."); } private TrieNode deserializeHelper(String[] tokens, int index) { // Create Trie Node TrieNode node = new TrieNode(); node.count = Integer.parseInt(tokens[index]); node.childCounts = Integer.parseInt(tokens[index + 1]); System.out.println("Index "+index+" Deserialized node: " + node.count + " " + node.childCounts+" token "+tokens[index+2]); for (int i = index + 2; i < tokens.length; i += 2) { node.children.put(tokens[i], deserializeHelper(tokens, i + 1)); } return node; // Set the current node's count and childCounts node.count = Integer.parseInt(parts[index[0]++]); node.childCounts = Integer.parseInt(parts[index[0]++]); if ("{".equals(parts[index[0]])) { index[0]++; // Move past the "{" marker while (!"}".equals(parts[index[0]])) { String key = parts[index[0]++]; TrieNode child = new TrieNode(); deserializeHelper(child, parts, index); node.children.put(key, child); } index[0]++; // Skip the "}" marker } } } src/main/java/edu/bu/LanguageCorrection/crawler.java +3 −2 Original line number Diff line number Diff line Loading @@ -113,6 +113,7 @@ public class crawler { System.out.println(web_data.text()); */ // Extract word usage data //System.err.println(web_data.text()); extractWordUsage(web_data.text(), wordUsage); byte[] metadata = wordUsage.serialize(); System.out.println("Metadata extracted successfully."); Loading Loading @@ -140,7 +141,7 @@ public class crawler { // Split text into sentences String[] sentences = text.split("[.!?] "); for (String sentence : sentences) { for (int nGram = 1; nGram <= 2; nGram++) { for (int nGram = 1; nGram <= 3; nGram++) { String[] words = sentence.split("\\s+"); for (int i = 0; i < words.length - nGram + 1; i++) { trie.insert(Arrays.copyOfRange(words, i, i + nGram)); Loading target/classes/edu/bu/LanguageCorrection/TrieNode.class −404 B (4.55 KiB) File changed.No diff preview for this file type. View original file View changed file target/classes/edu/bu/LanguageCorrection/crawler.class −43 B (8.57 KiB) File changed.No diff preview for this file type. View original file View changed file Loading
src/main/java/edu/bu/LanguageCorrection/TrieNode.java +42 −25 Original line number Diff line number Diff line Loading @@ -2,6 +2,7 @@ package edu.bu.LanguageCorrection; import java.io.Serializable; import java.util.HashMap; import java.util.Stack; public class TrieNode implements Serializable { HashMap<String, TrieNode> children = new HashMap<>(); Loading Loading @@ -54,44 +55,60 @@ public class TrieNode implements Serializable { } public byte[] serialize() { return serializeHelper(this).getBytes(); return serializeHelper(this).replaceAll(" \\}", "\\}").trim().getBytes(); } private String serializeHelper(TrieNode node) { StringBuilder serialized = new StringBuilder(); serialized.append(node.count).append(" ").append(node.childCounts).append(" "); serialized.append(node.count).append(" ").append(node.childCounts); if (node.childCounts == 0) { serialized.append(" "); // System.out.println("Serialized node: " + node.count + " " + node.childCounts); return serialized.toString(); } else { serialized.append("{"); } for (String key : node.children.keySet()) { System.out.println(" key: " + key); // System.out.println(" key: " + key); serialized.append(key).append(" ").append(serializeHelper(node.children.get(key))); } System.out.println("Serialized node: " + node.count + " " + node.childCounts); serialized.append("}"); // System.out.println("Serialized node: " + node.count + " " + node.childCounts); return serialized.toString(); } public TrieNode deserialize(byte[] serialized) { String serializedString = new String(serialized); String[] tokens = serializedString.split(" "); System.out.println("Deserialized metadata size: " + tokens.length + " tokens"); // Create Root TrieNode root = new TrieNode(); root.count = Integer.parseInt(tokens[0]); root.childCounts = Integer.parseInt(tokens[1]); for (int i = 2; i < tokens.length; i += 2) { root.children.put(tokens[i], deserializeHelper(tokens, i + 1)); } return root; public void deserialize(byte[] data) { String serializedString = new String(data); // Ensure proper spacing around '{' and '}' to correctly parse children serializedString = serializedString.replaceAll("\\{", " { ").replaceAll("\\}", " } ").trim(); String[] parts = serializedString.split("\\s+"); int[] index = { 0 }; // Clear current state before rebuilding this.children.clear(); deserializeHelper(this, parts, index); } // Helper method adapted for in-place deserialization private static void deserializeHelper(TrieNode node, String[] parts, int[] index) { if (index[0] >= parts.length) { throw new IllegalArgumentException("Unexpected end of serialized data."); } private TrieNode deserializeHelper(String[] tokens, int index) { // Create Trie Node TrieNode node = new TrieNode(); node.count = Integer.parseInt(tokens[index]); node.childCounts = Integer.parseInt(tokens[index + 1]); System.out.println("Index "+index+" Deserialized node: " + node.count + " " + node.childCounts+" token "+tokens[index+2]); for (int i = index + 2; i < tokens.length; i += 2) { node.children.put(tokens[i], deserializeHelper(tokens, i + 1)); } return node; // Set the current node's count and childCounts node.count = Integer.parseInt(parts[index[0]++]); node.childCounts = Integer.parseInt(parts[index[0]++]); if ("{".equals(parts[index[0]])) { index[0]++; // Move past the "{" marker while (!"}".equals(parts[index[0]])) { String key = parts[index[0]++]; TrieNode child = new TrieNode(); deserializeHelper(child, parts, index); node.children.put(key, child); } index[0]++; // Skip the "}" marker } } }
src/main/java/edu/bu/LanguageCorrection/crawler.java +3 −2 Original line number Diff line number Diff line Loading @@ -113,6 +113,7 @@ public class crawler { System.out.println(web_data.text()); */ // Extract word usage data //System.err.println(web_data.text()); extractWordUsage(web_data.text(), wordUsage); byte[] metadata = wordUsage.serialize(); System.out.println("Metadata extracted successfully."); Loading Loading @@ -140,7 +141,7 @@ public class crawler { // Split text into sentences String[] sentences = text.split("[.!?] "); for (String sentence : sentences) { for (int nGram = 1; nGram <= 2; nGram++) { for (int nGram = 1; nGram <= 3; nGram++) { String[] words = sentence.split("\\s+"); for (int i = 0; i < words.length - nGram + 1; i++) { trie.insert(Arrays.copyOfRange(words, i, i + nGram)); Loading
target/classes/edu/bu/LanguageCorrection/TrieNode.class −404 B (4.55 KiB) File changed.No diff preview for this file type. View original file View changed file
target/classes/edu/bu/LanguageCorrection/crawler.class −43 B (8.57 KiB) File changed.No diff preview for this file type. View original file View changed file