Commit 0626ebdb authored by Ari Trachtenberg's avatar Ari Trachtenberg
Browse files

broadened file check to look only at printable ASCII

parent ab055aca
Loading
Loading
Loading
Loading
+14 −2
Original line number Diff line number Diff line
@@ -99,8 +99,8 @@ public class Tester {
        // ... also based on http://www.exampledepot.com/egs/java.io/CopyFile.html
        String sf1, sf2;
        try {
            sf1 = readFile(file1); sf1 = sf1.replaceAll("[\n\r]","");
            sf2 = readFile(file2); sf2 = sf2.replaceAll("[\n\r]","");
            sf1 = cleanString(readFile(file1));
            sf2 = cleanString(readFile(file2));
        }
        catch (IOException e) {return false;} // any exceptions are treated as a lack of a match

@@ -108,6 +108,18 @@ public class Tester {

    }

    /**
     * Removes all non-printable characters from a string.
     * Based on:  https://howtodoinjava.com/regex/java-clean-ascii-text-non-printable-chars/
     * @param str
     */
    private static String cleanString(String str) {
        str=str.replaceAll("[^\\x00-\\x7F]", ""); // non-ASCII
        str = str.replaceAll("[\\p{Cntrl}&&[^\r\n\t]]", ""); // ASCII control characters - see http://www.unicode.org/reports/tr18/#Compatibility_Properties
        str = str.replaceAll("\\p{C}", ""); // non-printable characters - see http://www.unicode.org/reports/tr18/#General_Category_Property
        return str;
    }

    /**
     * Reads a file into a string
     * @param file1 the file to read