Commit 220195ef authored by Hyun Soo  Kim's avatar Hyun Soo Kim
Browse files

rename chemcial_formula to molecular_formula

parent bd8408d6
Loading
Loading
Loading
Loading
+19 −15
Original line number Diff line number Diff line
@@ -6,9 +6,11 @@ import time
MIN_ISOMER_COUNT = 100

MAX_CID_SEGMENT_LEN = 4000  # maybe 3997
MAX_MOLECULE_COUNT = 1
MAX_MOLECULE_COUNT = 10_000_000

C_count_max = 10000
TITIN = "C169719H270466N45688O52238S911"

C_count_max = 100
C_count_min = C_count_max // 2

H_C_ratio_max = 48 / 25
@@ -47,18 +49,20 @@ if __name__ == "__main__":
    molecule_count = 0
    while molecule_count < MAX_MOLECULE_COUNT:

        chemical_formula = ""
        while chemical_formula in previous_formulae:
            chemical_formula = ""
        molecular_formula = ""
        while molecular_formula in previous_formulae:
            molecular_formula = ""
            C_count = random.randint(C_count_min, C_count_max)
            chemical_formula += "C" + str(C_count)
            chemical_formula += add_atom("H", C_count, H_C_ratio_max, H_C_ratio_min)
            chemical_formula += add_atom("N", C_count, N_C_ratio_max, N_C_ratio_min)
            chemical_formula += add_atom("O", C_count, O_C_ratio_max, O_C_ratio_min)
            molecular_formula += "C" + str(C_count)
            molecular_formula += add_atom("H", C_count, H_C_ratio_max, H_C_ratio_min)
            molecular_formula += add_atom("N", C_count, N_C_ratio_max, N_C_ratio_min)
            molecular_formula += add_atom("O", C_count, O_C_ratio_max, O_C_ratio_min)

        print(molecular_formula)

        url = (
            "https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/fastformula/"
            + chemical_formula
            + molecular_formula
            + "/cids/JSON"
        )
        previous_time = wait_for_api_ready(previous_time)
@@ -69,15 +73,15 @@ if __name__ == "__main__":

        data = response.json()
        if "Fault" in data:
            print(chemical_formula + ": " + data["Fault"]["Message"])
            previous_formulae.append(chemical_formula)
            print(molecular_formula + ": " + data["Fault"]["Message"])
            previous_formulae.append(molecular_formula)
            continue

        cids = data["IdentifierList"]["CID"]
        isomer_count = len(cids)
        if isomer_count < MIN_ISOMER_COUNT:
            # print(chemical_formula + ": Has only " + str(isomer_count) + " isomers")
            previous_formulae.append(chemical_formula)
            # print(molecular_formula + ": Has only " + str(isomer_count) + " isomers")
            previous_formulae.append(molecular_formula)
            continue

        cid_segment = str(cids[0])
@@ -101,7 +105,7 @@ if __name__ == "__main__":
                for molecule_struct in data["PropertyTable"]["Properties"]:
                    if "CanonicalSMILES" in molecule_struct:
                        molecule_name = (
                            chemical_formula + "_" + str(molecule_struct["CID"])
                            molecular_formula + "_" + str(molecule_struct["CID"])
                        )
                        smiles = molecule_struct["CanonicalSMILES"]
                        if (