Commit 8e915a1b authored by Hyun Soo  Kim's avatar Hyun Soo Kim
Browse files

Merge branch '9-improved-benchmarking' into 'master'

Resolve "Improved benchmarking"

Closes #9

See merge request ec504/ec504_projects/group4!13
parents 38c8e8da 1abfa744
Loading
Loading
Loading
Loading
+40 −3
Original line number Diff line number Diff line
@@ -4,12 +4,15 @@ import java.io.*;
import java.net.ConnectException;
import java.net.ServerSocket;
import java.net.Socket;
import java.nio.file.*;
import java.nio.file.attribute.BasicFileAttributes;
import java.util.ArrayList;

public class Main {

    static MoleculeDatabase moleculeDb = null;
    static boolean verbose = false;
    static int MINUTE = 60 * 1000;

    public static void initDb(String dbName) throws IOException {
        // Load the database
@@ -18,6 +21,8 @@ public class Main {
        if (dbFile.exists()) {
            moleculeDb.load(dbName);
        }
        moleculeDb.name = dbName;
        ProteinFactory.initAminoAcids();
    }

    public static void printVerbose(String s) {
@@ -26,11 +31,14 @@ public class Main {
        }
    }

    public static void commandHandler1(String cmd) {
    public static void commandHandler1(String cmd) throws IOException {
        switch (cmd) {
            case "--printDb":
                moleculeDb.printDb();
                break;
            case "--printName":
                System.out.println(moleculeDb.name);
                break;
            case "--verbose":
                if (verbose) {
                    System.out.println("verbose: true -> false");
@@ -40,17 +48,46 @@ public class Main {
                verbose = !verbose;
                moleculeDb.verbose = verbose;
                break;
            case "--makeManySimple":
                ProteinFactory.manySimpleProteins();
                break;
            case "--makeFewComplex":
                ProteinFactory.fewComplexProteins();
                break;
            case "--marco":
                System.out.println("polo");
                break;
            default:
                printVerbose("unrecognized command: " + cmd);
                break;
        }
    }

    public static void commandHandler2(String cmd, String moleculePath) {
    public static void addProteins(String proteinPath) throws IOException {
        Files.walkFileTree(Paths.get(proteinPath), new SimpleFileVisitor<>() {
            @Override
            public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) {
                if (!Files.isDirectory(file)) {
                    moleculeDb.addMolecule(new Molecule(proteinPath + "/" +
                            file.getParent().toString() + "/" + file.getFileName().toString()));
                }
                return FileVisitResult.CONTINUE;
            }
        });
    }

    public static void commandHandler2(String cmd, String moleculePath) throws IOException {
        switch (cmd) {
            case "--addMolecule":
                moleculeDb.addMolecule(new Molecule(moleculePath));
                break;
            case "--addProteins":
                try {
                    addProteins(moleculePath);
                } catch (Exception e) {
                    System.err.println(e.getMessage());
                }
                break;
            case "--findMolecule":
                Molecule molecule = moleculeDb.findMolecule(new Molecule(moleculePath));
                if (molecule == null) {
@@ -180,7 +217,7 @@ public class Main {
        } catch (ConnectException e) {
            // If a client connection fails, run the server side of the program
            ServerSocket serverSocket = new ServerSocket(PORT_NUMBER);
            serverSocket.setSoTimeout(60 * 1000);
            serverSocket.setSoTimeout(5 * MINUTE);

            // Set the default filename for the database
            String dbName = "molecule.db";
+2 −22
Original line number Diff line number Diff line
@@ -16,6 +16,8 @@ public class MoleculeDatabase {

    public boolean verbose = false;

    String name;

    public void printVerbose(String s) {
        if (verbose) {
            System.out.println(s);
@@ -117,28 +119,6 @@ public class MoleculeDatabase {
        return similar;
    }

    /**
     * Find all molecules that contain the @param subgraph
     * @param molecule subgraph
     * @return List of molecules that contain subgraph
     */
    public ArrayList<Molecule> findSubgraph(Molecule molecule) {
        ArrayList<Molecule> returnList = new ArrayList<Molecule>();
        int startingNumber = molecule.getNumAtoms();
        for(int ii : db.keySet()) {
            if (ii >= startingNumber) {
                for(Molecule m: db.get(ii)) {
                    if(m.isSubGraphPresent(molecule) != null) {
                        returnList.add(m);
                        System.out.println(m.moleculeName);
                    }
                }
            }
        }

        return returnList;
    }

    /**
     * Download Molecules from PubChem in range [start, end]
     */
+307 −0
Original line number Diff line number Diff line
import java.io.*;
import java.util.ArrayList;

public class ProteinFactory {

    public ProteinFactory() {
    }

    static final AminoAcid ALANINE;
    static final AminoAcid CYSTEINE;
    static final AminoAcid GLYCINE;
    static final AminoAcid LEUCINE;
    static final AminoAcid ISOLEUCINE;
    static final AminoAcid METHIONINE;
    static final AminoAcid PROLINE;
    static final AminoAcid SERINE;
    static final AminoAcid THREONINE;
    static final AminoAcid VALINE;

    static {
        try {
            ALANINE = new AminoAcid("src/amino_acid/Alanine.txt",
                    2, 12, 10, 5, 4);
            CYSTEINE = new AminoAcid("src/amino_acid/Cysteine.txt",
                    2, 12, 10, 5, 4);
            GLYCINE = new AminoAcid("src/amino_acid/Glycine.txt",
                    1, 9, 7, 4, 3);
            LEUCINE = new AminoAcid("src/amino_acid/Leucine.txt",
                    5, 21, 19, 8, 7);
            ISOLEUCINE = new AminoAcid("src/amino_acid/l-Isoleucine.txt",
                    5, 21, 19, 8, 7);
            METHIONINE = new AminoAcid("src/amino_acid/Methionine.txt",
                    5, 19, 17, 8, 7);
            PROLINE = new AminoAcid("src/amino_acid/Proline.txt",
                    5, 13, 16, 3, 7);
            SERINE = new AminoAcid("src/amino_acid/Serine.txt",
                    2, 12, 10, 5, 4);
            THREONINE = new AminoAcid("src/amino_acid/Threonine.txt",
                    3, 15, 13, 6, 5);
            VALINE = new AminoAcid("src/amino_acid/Valine.txt",
                    4, 18, 16, 7, 6);
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    static public void initAminoAcids() {
        ALANINE.symbol1 = "A";
        CYSTEINE.symbol1 = "C";
        GLYCINE.symbol1 = "G";
        ISOLEUCINE.symbol1 = "I";
        LEUCINE.symbol1 = "L";
        METHIONINE.symbol1 = "M";
        PROLINE.symbol1 = "P";
        SERINE.symbol1 = "S";
        THREONINE.symbol1 = "T";
        VALINE.symbol1 = "V";
    }

    static final int AMINO_ACID_COUNT = 10;

    static public class AminoAcid {
        int atomCount;
        StringBuffer atomBuffer;
        int bondCount = 0;
        ArrayList<Integer> bondList = new ArrayList<>();
        String name;
        String symbol1;
        ArrayList<Integer> termini = new ArrayList<>();
        int terminusC;
        int terminusHN;
        int terminusHO;
        int terminusN;
        int terminusO;

        public AminoAcid(
                String filePath, int terminusC, int terminusHN, int terminusHO, int terminusN, int terminusO)
                throws IOException {
            try (BufferedReader reader = new BufferedReader(new FileReader(filePath))) {
                name = reader.readLine();
                atomCount = Integer.parseInt(reader.readLine());
                atomBuffer = new StringBuffer(atomCount);
                for (int atomIdx = 0; atomIdx < atomCount; atomIdx++) {
                    atomBuffer.append(reader.readLine());
                }
                String bond;
                while ((bond = reader.readLine()) != null) {
                    String[] atoms = bond.split(" ");
                    bondList.add(Integer.parseInt(atoms[0]));
                    bondList.add(Integer.parseInt(atoms[1]));
                    bondCount++;
                }
            }
            termini.add(terminusHN);
            termini.add(terminusHO);
            termini.add(terminusO);
            termini.sort(null);
            this.terminusC = terminusC;
            this.terminusHN = terminusHN;
            this.terminusHO = terminusHO;
            this.terminusN = terminusN;
            this.terminusO = terminusO;
        }
    }

    static public class Protein {
        Integer atomCount;
        StringBuffer atomBuffer = new StringBuffer();
        ArrayList<String> bondList = new ArrayList<>();
        boolean complete = false;
        String name;
        int terminusC;

        public Protein(AminoAcid aminoAcid) {
            name = aminoAcid.symbol1;

            int min = Math.min(aminoAcid.terminusHO, aminoAcid.terminusO);
            atomBuffer.append(aminoAcid.atomBuffer, 0, min);
            int max = Math.max(aminoAcid.terminusHO, aminoAcid.terminusO);
            atomBuffer.append(aminoAcid.atomBuffer, min + 1, max);
            atomBuffer.append(aminoAcid.atomBuffer, max + 1, aminoAcid.atomCount);

            atomCount = aminoAcid.atomCount - 2;
            int bondCount = aminoAcid.bondCount;
            int terminusHO = aminoAcid.terminusHO;
            int terminusO = aminoAcid.terminusO;
            for (int bondIdx = 0; bondIdx < bondCount; bondIdx++) {
                int atom0 = aminoAcid.bondList.get(2 * bondIdx);
                if (atom0 == terminusHO || atom0 == terminusO) {
                    continue;
                }
                int atom1 = aminoAcid.bondList.get(2 * bondIdx + 1);
                if (atom1 == terminusHO || atom1 == terminusO) {
                    continue;
                }
                int atomDecrement = 0;
                atomDecrement += (atom0 > terminusHO) ? 1 : 0;
                atomDecrement += (atom0 > terminusO) ? 1 : 0;
                atom0 -= atomDecrement;
                atomDecrement = 0;
                atomDecrement += (atom1 > terminusHO) ? 1 : 0;
                atomDecrement += (atom1 > terminusO) ? 1 : 0;
                atom1 -= atomDecrement;
                bondList.add(String.format("%d %d", atom0, atom1));
            }
            int newTerminusC = aminoAcid.terminusC;
            newTerminusC -= (newTerminusC > terminusHO) ? 1 : 0;
            newTerminusC -= (newTerminusC > terminusO) ? 1 : 0;
            terminusC = newTerminusC;
        }

        public void write(String folderName) throws IOException {
            BufferedWriter writer = new BufferedWriter(new FileWriter(folderName + "/" + name + ".txt"));
            writer.write(name + "\n");
            writer.write(atomCount.toString());
            writer.write("\n");
            for (int ii = 0; ii < atomCount; ii++) {
                writer.write(atomBuffer.charAt(ii) + "\n");
            }
            for (String b : bondList) {
                writer.write(b + "\n");
            }
            writer.close();
        }
    }

    public static void addAminoAcid(Protein protein, AminoAcid aminoAcid, boolean isLast) {
        if (protein.complete) {
            System.out.println("cannot add to completed protein");
            return;
        }
        protein.name += aminoAcid.symbol1;
        if (isLast) {
            int baseIdx = protein.atomCount;
            int indexH = aminoAcid.atomBuffer.indexOf("H");
            protein.atomBuffer.append(aminoAcid.atomBuffer, 0, indexH);
            protein.atomBuffer.append(aminoAcid.atomBuffer, indexH + 1, aminoAcid.atomCount);
            protein.atomCount += aminoAcid.atomCount - 1;
            protein.bondList.add(String.format("%d %d", protein.terminusC, baseIdx + aminoAcid.terminusN));
            int bondCount = aminoAcid.bondCount;
            int terminusHN = aminoAcid.terminusHN;
            for (int bondIdx = 0; bondIdx < bondCount; bondIdx++) {
                int atom0 = aminoAcid.bondList.get(2 * bondIdx);
                if (atom0 == terminusHN) {
                    continue;
                } else if (atom0 > terminusHN) {
                    atom0--;
                }
                int atom1 = aminoAcid.bondList.get(2 * bondIdx + 1);
                if (atom1 == terminusHN) {
                    continue;
                } else if (atom1 > terminusHN) {
                    atom1--;
                }
                protein.bondList.add(String.format("%d %d", baseIdx + atom0, baseIdx + atom1));
            }
            protein.complete = true;
        } else {
            int baseIdx = protein.atomCount;
            int start = -1;
            for (Integer t : aminoAcid.termini) {
                protein.atomBuffer.append(aminoAcid.atomBuffer.substring(start + 1, t));
                start = t;
            }
            protein.atomBuffer.append(aminoAcid.atomBuffer.substring(start + 1));
            protein.atomCount += aminoAcid.atomCount - 3;
            protein.bondList.add(String.format("%d %d", protein.terminusC, baseIdx + aminoAcid.terminusN));
            int bondCount = aminoAcid.bondCount;
            int terminusHN = aminoAcid.terminusHN;
            int terminusHO = aminoAcid.terminusHO;
            int terminusO = aminoAcid.terminusO;
            for (int bondIdx = 0; bondIdx < bondCount; bondIdx++) {
                int atom0 = aminoAcid.bondList.get(2 * bondIdx);
                int atom1 = aminoAcid.bondList.get(2 * bondIdx + 1);
                if (aminoAcid.termini.contains(atom0) || aminoAcid.termini.contains(atom1)) {
                    continue;
                }
                int atomDecrement = 0;
                atomDecrement += (atom0 > terminusHN) ? 1 : 0;
                atomDecrement += (atom0 > terminusHO) ? 1 : 0;
                atomDecrement += (atom0 > terminusO) ? 1 : 0;
                atom0 -= atomDecrement;
                atomDecrement = 0;
                atomDecrement += (atom1 > terminusHN) ? 1 : 0;
                atomDecrement += (atom1 > terminusHO) ? 1 : 0;
                atomDecrement += (atom1 > terminusO) ? 1 : 0;
                atom1 -= atomDecrement;
                protein.bondList.add(String.format("%d %d", baseIdx + atom0, baseIdx + atom1));
            }
            int newTerminusC = aminoAcid.terminusC;
            newTerminusC -= (newTerminusC > terminusHN) ? 1 : 0;
            newTerminusC -= (newTerminusC > terminusHO) ? 1 : 0;
            newTerminusC -= (newTerminusC > terminusO) ? 1 : 0;
            protein.terminusC = baseIdx + newTerminusC;
        }
    }

    public static AminoAcid getAminoAcid(int id) {
        return switch (id) {
            case 9 -> LEUCINE;      // 22
            case 8 -> ISOLEUCINE;   // 22
            case 7 -> METHIONINE;   // 20
            case 6 -> VALINE;       // 19
            case 5 -> THREONINE;    // 17
            case 4 -> PROLINE;      // 17
            case 3 -> SERINE;       // 14
            case 2 -> CYSTEINE;     // 14
            case 1 -> ALANINE;      // 13
            default -> GLYCINE;     // 10
        };
    }

    public static Protein generateProtein(int seed, int chainLen) {
        Protein protein = new Protein(getAminoAcid(seed % AMINO_ACID_COUNT));
        for (int ii = 0; ii < chainLen - 2; ii++) {
            seed /= AMINO_ACID_COUNT;
            addAminoAcid(protein, getAminoAcid(seed % AMINO_ACID_COUNT), false);
        }
        seed /= AMINO_ACID_COUNT;
        addAminoAcid(protein, getAminoAcid(seed % AMINO_ACID_COUNT), true);
        return protein;
    }

    public static void manySimpleProteins() throws IOException {
        int simpleIdx = 0;
        File simpleDir = new File("../simple");
        for (int ii = 0; ii < 100; ii++) {
            System.out.println(ii + "% done generating simple proteins (< 137 atoms)");
            File simpleDirNested = new File(simpleDir, "simple" + ii);
            if (simpleDirNested.mkdirs()) {
                System.out.println("made new directory: " + simpleDirNested);
            }
            for (int jj = 0; jj < 100000; jj++) {
                Protein protein = generateProtein(simpleIdx, 7);
                protein.write(simpleDirNested.toString());
                simpleIdx++;
            }
        }
        System.out.println("done");
    }

    public static void fewComplexProteins() throws IOException {
        int complexIdx = 0;
        File complexDir = new File("../complex");
        for (int ii = 0; ii < 10; ii++) {
            System.out.println(ii + "0% done generating complex proteins (>= 10006 atoms)");
            File complexDirNested = new File(complexDir, "complex" + ii);
            if (complexDirNested.mkdirs()) {
                System.out.println("made new directory: " + complexDirNested);
            }
            for (int jj = 0; jj < 1000; jj++) {
                Protein protein = generateProtein(complexIdx, 1429);
                protein.name = "protein" + complexIdx;
                protein.write(complexDirNested.toString());
                complexIdx++;
            }
        }
        System.out.println("done");
    }

    public static void main(String[] args) throws IOException {
        System.out.println("hello world");
        manySimpleProteins();
        fewComplexProteins();
        System.out.println("goodbye");
    }
}
+0 −132
Original line number Diff line number Diff line
@@ -216,138 +216,6 @@ public class Molecule implements Serializable {
        return similarity;
    }

    public Molecule isSubGraphPresent(Molecule subgraph) {
        //compare the number of elements
        for (int ii = 0; ii < numElements.length; ii++)
            if (this.numElements[ii] < subgraph.numElements[ii])
                return null;

        // Compare # of edges
        if (this.numEdges < subgraph.numEdges)
            return null;

        //Hashmap of candidates
        HashMap<Atom, ArrayList<Atom>> CandidateList = new HashMap<>();

        //For each atom in the array list
        for (Atom possibleCandidate : this.atomArrayList) {
            Object[] cndArray = possibleCandidate.connected.values().toArray();
            for (Atom keyAtom : subgraph.atomArrayList) {
                if (!keyAtom.equals(possibleCandidate)) {
                    if (keyAtom.elementType == possibleCandidate.elementType) {
                        if (keyAtom.degree <= possibleCandidate.degree) {
                            boolean validConnects = true;
                            boolean[] edgeMarked = new boolean[possibleCandidate.connected.size()];
                            for (Atom.ElemOrderPair keyValue : keyAtom.connected.values()) {
                                boolean edgeFound = false;
                                for (int aa = 0; aa < edgeMarked.length; aa++) {
                                    if (!edgeMarked[aa] && keyValue.eType == ((Atom.ElemOrderPair) cndArray[aa]).eType && keyValue.bondOrder == ((Atom.ElemOrderPair) cndArray[aa]).bondOrder) {
                                        edgeMarked[aa] = true;
                                        edgeFound = true;
                                        break;
                                    }
                                }
                                if (!edgeFound) {
                                    validConnects = false;
                                }
                            }
                            if (validConnects) {
                                //add atom to list
                                if (!CandidateList.containsKey(keyAtom)) {
                                    CandidateList.put(keyAtom, new ArrayList<>());
                                }
                                CandidateList.get(keyAtom).add(possibleCandidate);
                            }
                        }
                    }
                }
            }
        }
        //if it matches the type (elem and connections) as one in the subgraph, add it to candidate list for subgrpah

        //Look through list of candidates, and if any of the lists are empty return null
        for (Atom sweep : subgraph.atomArrayList)
            if (!CandidateList.containsKey(sweep))
                return null;


        //Unmark all atoms (marked = visited)
        for (Atom cleanAtom : this.atomArrayList)
            cleanAtom.marked = false;
        for (Atom cleanAtom : subgraph.atomArrayList)
            cleanAtom.marked = false;

        //BFS node: parent atom, other candidates
        //Make a linked list of the BFS node
        LinkedList<subGraphNode> subgraphTraversal = new LinkedList<subGraphNode>();
        //head is the first atom (parent is null)
        subgraphTraversal.addFirst(new subGraphNode(null, subgraph.atomArrayList.get(0)));
        for (Atom c : CandidateList.get(subgraph.atomArrayList.get(0))) {
            //c.marked = true;
            subgraphTraversal.get(0).options.add(c);
        }
        subgraphTraversal.getFirst().self.marked = true;
        int pointer = 0;
        while(true) {
            subGraphNode current = subgraphTraversal.get(pointer);

            if(current.options.isEmpty()) {
                //If there are no options to choose from, move pointer backwards (to parent) and restart cycke
                //if parent is null return null
                if(current.parent == null)
                    return null;
                else
                    pointer = subgraphTraversal.indexOf(current.parent);
            }
            else {
                boolean mustReverse = false;
                int adjAdded = 0;
                //remove option (choose it)
                Atom path = current.options.remove(0);
                path.marked = true;
                //else add adjacent local nodes
                for(String k :current.self.connected.keySet()) {
                    for(Atom a: subgraph.atomArrayList) {
                        if(!a.marked && a.getName().equals(k)) {
                            a.marked = true;
                            subgraphTraversal.add(new subGraphNode(current,a));
                            adjAdded++;
                            //add options to adjacent (checking validity:not marked and connected to parent)
                            ArrayList<Atom> aList = CandidateList.get(a);
                            boolean candFound = false;
                            for(Atom cand: aList) {
                                if(!cand.marked) { //not marked
                                    if(cand.connected.containsKey(path.getName())) {  //is connected
                                        subgraphTraversal.getLast().options.add(cand);
                                        candFound = true;
                                    }
                                }
                            }
                            if(!candFound)
                                mustReverse = true;
                            break;
                        }
                    }
                }
                if(mustReverse) {
                    path.marked = false;
                    for(int ff = 0; ff < adjAdded;ff++) {
                        subgraphTraversal.getLast().self.marked = false;
                        subgraphTraversal.remove(subgraphTraversal.getLast());
                    }
                }
                else{
                    if(subgraphTraversal.getLast().equals(subgraphTraversal.get(pointer)))
                        return this;
                    pointer++;
                }
            }

        }

    }
    

    /**
     * Returns the molecule if it contains subgraph
     * @param subgraph
+125 −0

File added.

Preview size limit exceeded, changes collapsed.

Loading