Loading testcases/molecule_input.py +73 −13 Original line number Diff line number Diff line Loading @@ -96,20 +96,23 @@ def writeIsomorphic(mol_name, smiles): return 0 if __name__ == "__main__": START_CIN = 24524 STOP_CIN = 30000 INCREMENT = 100 # HH = 783 # H2 = 24523 # range(start, end, step) --> Change values for number of molecules required for indx in range(START_CIN,STOP_CIN,INCREMENT): def writeSMILES(smiles, mol_name, files): if files == 'B': if writeMolecule(mol_name, smiles) == 0: writeIsomorphic(mol_name, smiles) elif files == 'M': writeMolecule(mol_name, smiles) elif files == 'I': writeIsomorphic(mol_name, smiles) def writePubChem(start, end, files): MAXSTEP = 100 INCREMENT = min(end - start, MAXSTEP) for indx in range(start,end,INCREMENT): start_time = time.time() numbers = [str(i) for i in range(indx, indx + INCREMENT)] # if HH in numbers: # numbers.remove(HH) indexes = ",".join(numbers) # query from pubchem URL Loading @@ -123,19 +126,76 @@ if __name__ == "__main__": for chemical in page_text['PropertyTable']['Properties']: # check if desired keys are in the json if 'CanonicalSMILES' in chemical: # and 'Title' in chemical: # mol_name = chemical['Title'] if 'CanonicalSMILES' in chemical: mol_name = "molecule" + str(chemical['CID']) smiles = chemical['CanonicalSMILES'] # ignore Hydrogen Molecules if smiles == "[HH]": continue print("molecule "+ str(chemical['CID']) + ": " + mol_name + "\t" + "smiles: " + smiles) if files == 'B': if writeMolecule(mol_name, smiles) == 0: writeIsomorphic(mol_name, smiles) elif files == 'M': writeMolecule(mol_name, smiles) elif files == 'I': writeIsomorphic(mol_name, smiles) else: print("Failed to retrieve the page. Status code:", response.status_code) while (time.time() - start_time < 0.3): pass if __name__ == "__main__": # Welcome Message print("##################################################################################\n" "Welcome to the Molecule Input generator. Make sure this file is being run in the ./testcases directory.\n" "This script should be used in order to generate the required input files needed to test the main application. \n" "Any molecules with 1.5 order bonds will be ignored." "Inputs can be created using SMILES Strings (SS) or pulled from the PubChem Chemical API (PC). \n" "Both standard molecule and isomorphic molecule input files can be generated.\n" "Standard Molecules wil be placed in the ./molecules directory\n" "Isomorphic Molecules will be placed in the ./isomorphic_test directory\n" "##################################################################################\n") method = "" while True: method = input("Select an input method [SS/PC]: ").strip().upper() if method in ['SS', 'PC']: break else: print("ERROR: Invalid input. Please enter 'SS' for SMILES String or 'PC' for PubChem Chemical API.") inputType = "" while True: inputType = input("Select input types to create [M/I/B(default)]: ").strip().upper() if inputType in ['M', 'I']: break elif inputType == '': inputType = 'B' break else: print("ERROR: Invalid input type. Please enter 'M' for Molecule, 'I' for Isomorphic, 'B' for Both.") if method == 'SS': smiles = input("Enter the SMILES String: ") mol_name = input("Enter the name of the molecule: ") writeSMILES(smiles, mol_name, inputType) elif method == 'PC': while True: id_range_input = input('Enter the Chemical ID Range to grab in the format "start end": ').strip() try: start, end = map(int, id_range_input.split()) if start >= end: print("ERROR: The start value must be less than the end value. Please enter a valid range.") continue if start < 0: print("ERROR: Neither value can be less than zero. Please enter a valid range.") continue writePubChem(start, end + 1, inputType) break except ValueError: print("Invalid range format. Please enter two integers in the format 'start, end'.") Loading
testcases/molecule_input.py +73 −13 Original line number Diff line number Diff line Loading @@ -96,20 +96,23 @@ def writeIsomorphic(mol_name, smiles): return 0 if __name__ == "__main__": START_CIN = 24524 STOP_CIN = 30000 INCREMENT = 100 # HH = 783 # H2 = 24523 # range(start, end, step) --> Change values for number of molecules required for indx in range(START_CIN,STOP_CIN,INCREMENT): def writeSMILES(smiles, mol_name, files): if files == 'B': if writeMolecule(mol_name, smiles) == 0: writeIsomorphic(mol_name, smiles) elif files == 'M': writeMolecule(mol_name, smiles) elif files == 'I': writeIsomorphic(mol_name, smiles) def writePubChem(start, end, files): MAXSTEP = 100 INCREMENT = min(end - start, MAXSTEP) for indx in range(start,end,INCREMENT): start_time = time.time() numbers = [str(i) for i in range(indx, indx + INCREMENT)] # if HH in numbers: # numbers.remove(HH) indexes = ",".join(numbers) # query from pubchem URL Loading @@ -123,19 +126,76 @@ if __name__ == "__main__": for chemical in page_text['PropertyTable']['Properties']: # check if desired keys are in the json if 'CanonicalSMILES' in chemical: # and 'Title' in chemical: # mol_name = chemical['Title'] if 'CanonicalSMILES' in chemical: mol_name = "molecule" + str(chemical['CID']) smiles = chemical['CanonicalSMILES'] # ignore Hydrogen Molecules if smiles == "[HH]": continue print("molecule "+ str(chemical['CID']) + ": " + mol_name + "\t" + "smiles: " + smiles) if files == 'B': if writeMolecule(mol_name, smiles) == 0: writeIsomorphic(mol_name, smiles) elif files == 'M': writeMolecule(mol_name, smiles) elif files == 'I': writeIsomorphic(mol_name, smiles) else: print("Failed to retrieve the page. Status code:", response.status_code) while (time.time() - start_time < 0.3): pass if __name__ == "__main__": # Welcome Message print("##################################################################################\n" "Welcome to the Molecule Input generator. Make sure this file is being run in the ./testcases directory.\n" "This script should be used in order to generate the required input files needed to test the main application. \n" "Any molecules with 1.5 order bonds will be ignored." "Inputs can be created using SMILES Strings (SS) or pulled from the PubChem Chemical API (PC). \n" "Both standard molecule and isomorphic molecule input files can be generated.\n" "Standard Molecules wil be placed in the ./molecules directory\n" "Isomorphic Molecules will be placed in the ./isomorphic_test directory\n" "##################################################################################\n") method = "" while True: method = input("Select an input method [SS/PC]: ").strip().upper() if method in ['SS', 'PC']: break else: print("ERROR: Invalid input. Please enter 'SS' for SMILES String or 'PC' for PubChem Chemical API.") inputType = "" while True: inputType = input("Select input types to create [M/I/B(default)]: ").strip().upper() if inputType in ['M', 'I']: break elif inputType == '': inputType = 'B' break else: print("ERROR: Invalid input type. Please enter 'M' for Molecule, 'I' for Isomorphic, 'B' for Both.") if method == 'SS': smiles = input("Enter the SMILES String: ") mol_name = input("Enter the name of the molecule: ") writeSMILES(smiles, mol_name, inputType) elif method == 'PC': while True: id_range_input = input('Enter the Chemical ID Range to grab in the format "start end": ').strip() try: start, end = map(int, id_range_input.split()) if start >= end: print("ERROR: The start value must be less than the end value. Please enter a valid range.") continue if start < 0: print("ERROR: Neither value can be less than zero. Please enter a valid range.") continue writePubChem(start, end + 1, inputType) break except ValueError: print("Invalid range format. Please enter two integers in the format 'start, end'.")