Commit dba7590a authored by Tristen  Liu's avatar Tristen Liu
Browse files

Implemented CLI for Molecule file generation script.

parent 1ca5b4ba
Loading
Loading
Loading
Loading
+73 −13
Original line number Diff line number Diff line
@@ -96,20 +96,23 @@ def writeIsomorphic(mol_name, smiles):
        
    return 0

if __name__ == "__main__":
    START_CIN = 24524
    STOP_CIN = 30000
    INCREMENT = 100
    # HH = 783
    # H2 = 24523

    # range(start, end, step) --> Change values for number of molecules required
    for indx in range(START_CIN,STOP_CIN,INCREMENT):
def writeSMILES(smiles, mol_name, files):
    if files == 'B':
        if writeMolecule(mol_name, smiles) == 0:
            writeIsomorphic(mol_name, smiles)
    elif files == 'M':
        writeMolecule(mol_name, smiles)
    elif files == 'I':
        writeIsomorphic(mol_name, smiles)

def writePubChem(start, end, files):
    MAXSTEP = 100
    INCREMENT = min(end - start, MAXSTEP)
    
    for indx in range(start,end,INCREMENT):
        start_time = time.time()

        numbers = [str(i) for i in range(indx, indx + INCREMENT)]
        # if HH in numbers:
        #     numbers.remove(HH)
        indexes = ",".join(numbers)
        
        # query from pubchem URL
@@ -123,19 +126,76 @@ if __name__ == "__main__":
            
            for chemical in page_text['PropertyTable']['Properties']:
                # check if desired keys are in the json
                if 'CanonicalSMILES' in chemical: # and 'Title' in chemical:
                    # mol_name = chemical['Title']
                if 'CanonicalSMILES' in chemical:
                    mol_name = "molecule" + str(chemical['CID'])
                    smiles = chemical['CanonicalSMILES']
                    # ignore Hydrogen Molecules
                    if smiles == "[HH]":
                        continue
                    
                    print("molecule "+ str(chemical['CID']) + ": " +  mol_name + "\t" + "smiles: " + smiles)
                
                    if files == 'B':
                        if writeMolecule(mol_name, smiles) == 0:
                            writeIsomorphic(mol_name, smiles)
                    elif files == 'M':
                        writeMolecule(mol_name, smiles)
                    elif files == 'I':
                        writeIsomorphic(mol_name, smiles)
        else:
            print("Failed to retrieve the page. Status code:", response.status_code)

        while (time.time() - start_time < 0.3):
            pass

if __name__ == "__main__":
    # Welcome Message
    print("##################################################################################\n"
          "Welcome to the Molecule Input generator. Make sure this file is being run in the ./testcases directory.\n"
          "This script should be used in order to generate the required input files needed to test the main application. \n"
          "Any molecules with 1.5 order bonds will be ignored."
          "Inputs can be created using SMILES Strings (SS) or pulled from the PubChem Chemical API (PC). \n"
          "Both standard molecule and isomorphic molecule input files can be generated.\n"
          "Standard Molecules wil be placed in the ./molecules directory\n"
          "Isomorphic Molecules will be placed in the ./isomorphic_test directory\n"
          "##################################################################################\n")
    
    method = ""
    while True:
        method = input("Select an input method [SS/PC]: ").strip().upper()
        if method in ['SS', 'PC']:
            break
        else:
            print("ERROR: Invalid input. Please enter 'SS' for SMILES String or 'PC' for PubChem Chemical API.")
    
    inputType = ""
    while True:
        inputType = input("Select input types to create [M/I/B(default)]: ").strip().upper()
        if inputType in ['M', 'I']:
            break
        elif inputType == '':
            inputType = 'B'
            break
        else:
            print("ERROR: Invalid input type. Please enter 'M' for Molecule, 'I' for Isomorphic, 'B' for Both.")
            
    if method == 'SS':
        smiles = input("Enter the SMILES String: ")
        mol_name = input("Enter the name of the molecule: ")
        writeSMILES(smiles, mol_name, inputType)
    elif method == 'PC':
        while True:
            id_range_input = input('Enter the Chemical ID Range to grab in the format "start end": ').strip()
            try:
                start, end = map(int, id_range_input.split())
                if start >= end:
                    print("ERROR: The start value must be less than the end value. Please enter a valid range.")
                    continue
                if start < 0:
                    print("ERROR: Neither value can be less than zero. Please enter a valid range.")
                    continue
                writePubChem(start, end + 1, inputType)
                break
                
            except ValueError:
                print("Invalid range format. Please enter two integers in the format 'start, end'.")