molecular / molecule /name.py
ivanm151's picture
init
6796365
import requests
from rdkit import Chem
def get_pubchem_name(smiles):
url = f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/{smiles}/property/IUPACName/JSON"
r = requests.get(url)
if r.status_code == 200:
data = r.json()
try:
cid = data['PropertyTable']['Properties'][0]['CID']
url = f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/{cid}/synonyms/JSON"
r = requests.get(url)
if r.status_code == 200:
data = r.json()
name = data['InformationList']['Information'][0]['Synonym'][0]
return name
except (KeyError, IndexError):
return None
return None
def generate_readable_name(smiles):
mol = Chem.MolFromSmiles(smiles)
if mol is None:
return "UnknownMolecule"
atoms = [atom.GetSymbol() for atom in mol.GetAtoms()]
# пример: C-C-O → CCO
return "".join(atoms)
def generate_short_signature(smiles, n=6):
mol = Chem.MolFromSmiles(smiles)
if mol is None:
return "MolX"
atoms = [atom.GetSymbol() for atom in mol.GetAtoms()]
signature = "".join(atoms[:n])
return f"{signature}-{len(atoms)}"
def get_name(smiles):
name = get_pubchem_name(smiles=smiles)
if name is None:
return "UnknownMolecule"
try:
name = generate_readable_name(smiles=smiles)
except Exception:
return "UnknownMolecule"
return name
if __name__ == '__main__':
smiles = "CCC1:C:C:C(CCOC2:C:C:C(CC3SC(=O)NC3=O):C:C:2):N:C:1"
print(get_name(smiles))
print(generate_readable_name(smiles))
print(generate_short_signature(smiles))
print(get_pubchem_name(smiles))