diff --git a/.idea/.gitignore b/.idea/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..26d33521af10bcc7fd8cea344038eaaeb78d0ef5
--- /dev/null
+++ b/.idea/.gitignore
@@ -0,0 +1,3 @@
+# Default ignored files
+/shelf/
+/workspace.xml
diff --git a/.idea/compiler.xml b/.idea/compiler.xml
new file mode 100644
index 0000000000000000000000000000000000000000..d2cebb0efc39e8710b68f40bc080e4f51a64b2f3
--- /dev/null
+++ b/.idea/compiler.xml
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/encodings.xml b/.idea/encodings.xml
new file mode 100644
index 0000000000000000000000000000000000000000..b7aa75eb9a45f1f4e52d34c43f010b575a3857f4
--- /dev/null
+++ b/.idea/encodings.xml
@@ -0,0 +1,7 @@
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/group7.iml b/.idea/group7.iml
new file mode 100644
index 0000000000000000000000000000000000000000..d6ebd4805981b8400db3e3291c74a743fef9a824
--- /dev/null
+++ b/.idea/group7.iml
@@ -0,0 +1,9 @@
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/jarRepositories.xml b/.idea/jarRepositories.xml
new file mode 100644
index 0000000000000000000000000000000000000000..712ab9d985c20018a0c97b93d2148ac1ffe588a5
--- /dev/null
+++ b/.idea/jarRepositories.xml
@@ -0,0 +1,20 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
new file mode 100644
index 0000000000000000000000000000000000000000..905619eeeb1728d0017a45d8f572326c923146ef
--- /dev/null
+++ b/.idea/misc.xml
@@ -0,0 +1,14 @@
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/modules.xml b/.idea/modules.xml
new file mode 100644
index 0000000000000000000000000000000000000000..5ce3c5e116c4f3c414e8445650743f9153a59318
--- /dev/null
+++ b/.idea/modules.xml
@@ -0,0 +1,8 @@
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/uiDesigner.xml b/.idea/uiDesigner.xml
new file mode 100644
index 0000000000000000000000000000000000000000..2b63946d5b31084bbb7dda418ceb3d75eb686373
--- /dev/null
+++ b/.idea/uiDesigner.xml
@@ -0,0 +1,124 @@
+
+
+
+
+ -
+
+
+ -
+
+
+ -
+
+
+ -
+
+
+ -
+
+
+
+
+
+ -
+
+
+
+
+
+ -
+
+
+
+
+
+ -
+
+
+
+
+
+ -
+
+
+
+
+ -
+
+
+
+
+ -
+
+
+
+
+ -
+
+
+
+
+ -
+
+
+
+
+ -
+
+
+
+
+ -
+
+
+ -
+
+
+
+
+ -
+
+
+
+
+ -
+
+
+
+
+ -
+
+
+
+
+ -
+
+
+
+
+ -
+
+
+ -
+
+
+ -
+
+
+ -
+
+
+ -
+
+
+
+
+ -
+
+
+ -
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
new file mode 100644
index 0000000000000000000000000000000000000000..35eb1ddfbbc029bcab630581847471d7f238ec53
--- /dev/null
+++ b/.idea/vcs.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/Crawler/pom.xml b/Crawler/pom.xml
new file mode 100644
index 0000000000000000000000000000000000000000..e1d04f160b3c2b132dcce15c54e93fa6bfb3d9bc
--- /dev/null
+++ b/Crawler/pom.xml
@@ -0,0 +1,25 @@
+
+
+ 4.0.0
+
+ org.example
+ Crawler
+ 1.0-SNAPSHOT
+
+
+ 18
+ 18
+ UTF-8
+
+
+
+
+ org.jsoup
+ jsoup
+ 1.15.3
+
+
+
+
\ No newline at end of file
diff --git a/Crawler/src/main/java/org/example/crawler.java b/Crawler/src/main/java/org/example/crawler.java
new file mode 100644
index 0000000000000000000000000000000000000000..5233e82df44d80d3bb56447ec8838f2640ac2693
--- /dev/null
+++ b/Crawler/src/main/java/org/example/crawler.java
@@ -0,0 +1,60 @@
+package org.example;
+
+import org.jsoup.*;
+import java.util.HashSet;
+import java.util.LinkedList;
+import java.io.BufferedReader;
+import java.io.FileReader;
+import java.io.IOException;
+
+import java.io.File;
+public class crawler {
+
+ public static void main(String[] args) throws IOException{
+ //Read all lines from a given file into the queue
+ System.out.println(new File(".").getAbsolutePath());
+
+ FileReader f_read = new FileReader("crawler_test_file.txt");
+ BufferedReader buf_read = new BufferedReader(f_read);
+ //Initialize web crawler
+ crawler web_crawler = new crawler();
+ String url_line;
+ while ((url_line = buf_read.readLine()) != null) {
+ web_crawler.add_to_queue(url_line);
+ }
+ web_crawler.crawl(0);
+ web_crawler.get_visited();
+ }
+ public crawler() {
+ url_queue = new LinkedList();
+ visited_urls = new HashSet();
+ }
+ public void add_to_queue(String url) {
+ url_queue.add(url);
+ }
+ public void crawl(int num_sites) {
+ //dequeue current website and add it to visited
+ String cur_site = url_queue.remove();
+ visited_urls.add(cur_site);
+
+ /*
+ - browse through URLS using jsoup
+ - properly interpret robots.txt
+ - compress + store text metadata
+ */
+
+ while ((!url_queue.isEmpty())) {
+ crawl(++num_sites);
+ }
+ }
+
+ public void get_visited() {
+ System.out.println("All of the visited websites:");
+ for(String url: visited_urls) {
+ System.out.println(url);
+ }
+ }
+ //members
+ private final LinkedList url_queue;
+ private final HashSet visited_urls;
+}
diff --git a/Crawler/target/classes/org/example/crawler.class b/Crawler/target/classes/org/example/crawler.class
new file mode 100644
index 0000000000000000000000000000000000000000..09db5d9476171e5d22d4497c78b8a11a6d51bf68
Binary files /dev/null and b/Crawler/target/classes/org/example/crawler.class differ