Loading Crawler/src/main/java/Debug.java 0 → 100644 +8 −0 Original line number Diff line number Diff line // Allows for quick and easy enabling/disabling of debug messages public class Debug { // Set to true to enable debug messages. Set to false to disable debug messages public static boolean DEBUG = false; public static boolean DEBUG_RobotsTXT = false; public static boolean DEBUG_Dutch = false; } Crawler/src/main/java/RobotsTXT.java 0 → 100644 +44 −0 Original line number Diff line number Diff line import java.util.ArrayList; // This class represents the robots.txt file for a given domain public class RobotsTXT { private String domain; private ArrayList<String> disallowedPaths; // Excluded paths private ArrayList<String> allowedPaths; // Exceptions to the excluded paths private int crawlDelay; // in seconds public RobotsTXT(String domain) { this.domain = domain; disallowedPaths = new ArrayList<String>(); allowedPaths = new ArrayList<String>(); crawlDelay = 1; // Default crawl delay is 1 second } public void setDomain(String domain) { this.domain = domain; } public String getDomain() { return domain; } public void addDisallowedPath(String path) { disallowedPaths.add(path); } public ArrayList<String> getDisallowedPaths() { return disallowedPaths; } public void addAllowedPath(String path) { allowedPaths.add(path); } public ArrayList<String> getAllowedPaths() { return allowedPaths; } public void setCrawlDelay(int delay) { crawlDelay = delay; } public int getCrawlDelay() { return crawlDelay; } } Loading
Crawler/src/main/java/Debug.java 0 → 100644 +8 −0 Original line number Diff line number Diff line // Allows for quick and easy enabling/disabling of debug messages public class Debug { // Set to true to enable debug messages. Set to false to disable debug messages public static boolean DEBUG = false; public static boolean DEBUG_RobotsTXT = false; public static boolean DEBUG_Dutch = false; }
Crawler/src/main/java/RobotsTXT.java 0 → 100644 +44 −0 Original line number Diff line number Diff line import java.util.ArrayList; // This class represents the robots.txt file for a given domain public class RobotsTXT { private String domain; private ArrayList<String> disallowedPaths; // Excluded paths private ArrayList<String> allowedPaths; // Exceptions to the excluded paths private int crawlDelay; // in seconds public RobotsTXT(String domain) { this.domain = domain; disallowedPaths = new ArrayList<String>(); allowedPaths = new ArrayList<String>(); crawlDelay = 1; // Default crawl delay is 1 second } public void setDomain(String domain) { this.domain = domain; } public String getDomain() { return domain; } public void addDisallowedPath(String path) { disallowedPaths.add(path); } public ArrayList<String> getDisallowedPaths() { return disallowedPaths; } public void addAllowedPath(String path) { allowedPaths.add(path); } public ArrayList<String> getAllowedPaths() { return allowedPaths; } public void setCrawlDelay(int delay) { crawlDelay = delay; } public int getCrawlDelay() { return crawlDelay; } }