TeLLAgent / tool /converters.py
jinysun's picture
Upload 46 files
64e9ead verified
from langchain.tools import BaseTool
from tool.chemspace import ChemSpace
import pandas as pd
from utils import (
is_multiple_smiles,
is_smiles,
pubchem_query2smiles,
query2cas,
smiles2name,
)
class Query2CAS(BaseTool):
name:str = "Mol2CAS"
description:str = "Input molecule (name or SMILES), returns CAS number."
url_cid: str = None
url_data: str = None
def __init__(
self,
):
super().__init__()
self.url_cid = (
"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/{}/{}/cids/JSON"
)
self.url_data = (
"https://pubchem.ncbi.nlm.nih.gov/rest/pug_view/data/compound/{}/JSON"
)
def _run(self, query: str) -> str:
try:
# if query is smiles
smiles = None
if is_smiles(query):
smiles = query
try:
cas = query2cas(query, self.url_cid, self.url_data)
except ValueError as e:
return str(e)
if smiles is None:
try:
smiles = pubchem_query2smiles(cas, None)
except ValueError as e:
return str(e)
return cas
except ValueError:
return "CAS number not found"
async def _arun(self, query: str) -> str:
"""Use the tool asynchronously."""
raise NotImplementedError()
class Query2SMILES(BaseTool):
name:str = "CAS2SMILES"
description :str = "Input a CAS number, returns SMILES."
url: str = None
chemspace_api_key: str = None
def __init__(self, chemspace_api_key: str = None):
super().__init__()
self.chemspace_api_key = chemspace_api_key
self.url = "https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{}/{}"
def _run(self, query: str) -> str:
"""This function queries the given molecule name and returns a SMILES string from the record"""
"""Useful to get the SMILES string of one molecule by searching the name of a molecule. Only query with one specific name."""
if is_smiles(query) and is_multiple_smiles(query):
return "Multiple SMILES strings detected, input one molecule at a time."
try:
smi = pubchem_query2smiles(query, self.url)
except Exception as e:
if self.chemspace_api_key:
try:
chemspace = ChemSpace(self.chemspace_api_key)
smi = chemspace.convert_mol_rep(query, "smiles")
smi = smi.split(":")[1]
except Exception:
return str(e)
else:
try:
smi = chemspace.convert_mol_rep(query, "smiles")
smi = smi.split(":")[1]
except Exception:
return str(e)
return smi
async def _arun(self, query: str) -> str:
"""Use the tool asynchronously."""
raise NotImplementedError()
class Mol2SMILES(BaseTool):
name:str = "Mol2SMILES"
description :str = "Input a molecular name , returns SMILES."
def __init__(self, chemspace_api_key: str = None):
super().__init__()
def _run(self, query: str) -> str:
"""This function queries the given molecule name and returns a SMILES string from the record"""
"""Useful to get the SMILES string of one molecule by searching the name of a molecule. Only query with one specific name."""
if is_smiles(query) and is_multiple_smiles(query):
return "Multiple SMILES strings detected, input one molecule at a time."
try:
smi = pubchem_query2smiles(query )
return smi
except Exception as e:
try:
csv_data = pd.read_csv('tool/dataset.csv',encoding='ISO-8859-1')
relevant_rows = csv_data[csv_data['Name']==(query)]
if not relevant_rows.empty:
# Get the most relevant answer (assuming we return the first match)
return relevant_rows.iloc[0]['SMILES']
except:
return str(e)
async def _arun(self, query: str) -> str:
"""Use the tool asynchronously."""
raise NotImplementedError()
class SMILES2Name(BaseTool):
name:str = "SMILES2Name"
description:str = "Input SMILES, returns molecule name."
def __init__(self):
super().__init__()
def _run(self, query: str) -> str:
"""Use the tool."""
try:
if not is_smiles(query):
try:
query2smiles = Query2SMILES()
query = query2smiles.run(query)
except:
raise ValueError("Invalid molecule input, no Pubchem entry")
name = smiles2name(query)
return name
except Exception as e:
return "Error: " + str(e)
async def _arun(self, query: str) -> str:
"""Use the tool asynchronously."""
raise NotImplementedError()