Updated script to query from URL for Chemical name and SMILES representation (b737fda1) · Commits · EC504 Spring 2024 Group Projects / Group4

testcases/molecule_input.py

+56 −15

Original line number	Diff line number	Diff line
		from pysmiles import read_smiles
		import networkx as nx
		import random
		import requests
		import os

		# returns the order number of the edge that appears using nx.edge_list
		def getOrder(line):
		return int(line.split("{'order': ")[1].rstrip('}\n'))
		# only allow integer bond order values
		order = line.split("{'order': ")[1].rstrip('}\n')
		return int(line.split("{'order': ")[1].rstrip('}\n')) if order.isdigit() else -1

		# multiplies the edge pair by the order number
		def processContent(content):
		new_content = ""
		for line in content:
		# Check if the line contains bond order information
		# check if the line contains bond order information
		if "{'order':" in line:
		bond_order = getOrder(line)
		# if bond order is invalid, then return error
		if bond_order == -1:
		return -1

		# duplicate the edge for value of bond order
		vertex_pair = ' '.join(line.split()[:2])
		for i in range(bond_order):
		new_content += vertex_pair + '\n'
		@@ -28,7 +37,7 @@ def scrambleLabels(graph):
		random.shuffle(shuffled_nodes)
		new_map = dict(zip(nodes, shuffled_nodes))

		# Use the mapping to generate a new graph with shuffled labels
		# use the mapping to generate a new graph with shuffled labels
		scrambled_graph = nx.relabel_nodes(graph, new_map)
		return scrambled_graph

		@@ -45,15 +54,22 @@ def writeMolecule(mol_name, smiles):
		mol_data += node[1] + '\n'


		# Read the existing contents of the file
		# read the generated edgelist
		with open(file_path_mol, 'r') as file:
		edgelist = file.read()

		# if theres an invalid edgelist, remove it from the file system
		modified_edgelist = processContent(edgelist.split('\n'))
		# Write the new content at the beginning and the original content
		if modified_edgelist == -1:
		os.remove(file_path_mol)
		return -1

		# write metadata at the beginning and the original content
		with open(file_path_mol, 'w') as file:
		file.write(mol_data + modified_edgelist)

		return 0

		def writeIsomorphic(mol_name, smiles):
		iso_graph = scrambleLabels(read_smiles(smiles, explicit_hydrogen=True))
		file_path_iso = "./isomorphic_test/" + mol_name + "_iso.txt"
		@@ -70,16 +86,41 @@ def writeIsomorphic(mol_name, smiles):
		isolist = file.read()

		modified_isolist = processContent(isolist.split('\n'))
		if modified_isolist == -1:
		os.remove(file_path_iso)
		return -1

		with open(file_path_iso, 'w') as file:
		file.write(iso_data + modified_isolist)

		return 0

		if __name__ == "__main__":
		# replace with name of molecule
		mol_name = "adenine"
		# range(start, end, step) --> Change values for number of molecules required
		for indx in range(1,30,10):
		numbers = [str(i) for i in range(indx, indx + 10)]
		indexes = ",".join(numbers)

		# replace with smiles string of molecule
		smiles = 'c1[nH]c(c-2ncnc2n1)N'
		# query from pubchem URL
		url = 'https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/' + indexes + '/property/Title,CanonicalSMILES/json'
		response = requests.get(url)

		writeMolecule(mol_name, smiles)
		# Check if the request was successful
		if response.status_code == 200:
		# Extract the json from the response
		page_text = response.json()

		for chemical in page_text['PropertyTable']['Properties']:
		# check if desired keys are in the json
		if 'Title' in chemical and 'CanonicalSMILES' in chemical:
		mol_name = chemical['Title']
		smiles = chemical['CanonicalSMILES']

		print("molecule "+ str(chemical['CID']) + ": " + mol_name + "\t" + "smiles: " + smiles)

		if writeMolecule(mol_name, smiles) == 0:
		writeIsomorphic(mol_name, smiles)
		else:
		print("Failed to retrieve the page. Status code:", response.status_code)


		No newline at end of file