File size: 5,110 Bytes
64e9ead |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 |
from langchain.tools import BaseTool
from tool.chemspace import ChemSpace
import pandas as pd
from utils import (
is_multiple_smiles,
is_smiles,
pubchem_query2smiles,
query2cas,
smiles2name,
)
class Query2CAS(BaseTool):
name:str = "Mol2CAS"
description:str = "Input molecule (name or SMILES), returns CAS number."
url_cid: str = None
url_data: str = None
def __init__(
self,
):
super().__init__()
self.url_cid = (
"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/{}/{}/cids/JSON"
)
self.url_data = (
"https://pubchem.ncbi.nlm.nih.gov/rest/pug_view/data/compound/{}/JSON"
)
def _run(self, query: str) -> str:
try:
# if query is smiles
smiles = None
if is_smiles(query):
smiles = query
try:
cas = query2cas(query, self.url_cid, self.url_data)
except ValueError as e:
return str(e)
if smiles is None:
try:
smiles = pubchem_query2smiles(cas, None)
except ValueError as e:
return str(e)
return cas
except ValueError:
return "CAS number not found"
async def _arun(self, query: str) -> str:
"""Use the tool asynchronously."""
raise NotImplementedError()
class Query2SMILES(BaseTool):
name:str = "CAS2SMILES"
description :str = "Input a CAS number, returns SMILES."
url: str = None
chemspace_api_key: str = None
def __init__(self, chemspace_api_key: str = None):
super().__init__()
self.chemspace_api_key = chemspace_api_key
self.url = "https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{}/{}"
def _run(self, query: str) -> str:
"""This function queries the given molecule name and returns a SMILES string from the record"""
"""Useful to get the SMILES string of one molecule by searching the name of a molecule. Only query with one specific name."""
if is_smiles(query) and is_multiple_smiles(query):
return "Multiple SMILES strings detected, input one molecule at a time."
try:
smi = pubchem_query2smiles(query, self.url)
except Exception as e:
if self.chemspace_api_key:
try:
chemspace = ChemSpace(self.chemspace_api_key)
smi = chemspace.convert_mol_rep(query, "smiles")
smi = smi.split(":")[1]
except Exception:
return str(e)
else:
try:
smi = chemspace.convert_mol_rep(query, "smiles")
smi = smi.split(":")[1]
except Exception:
return str(e)
return smi
async def _arun(self, query: str) -> str:
"""Use the tool asynchronously."""
raise NotImplementedError()
class Mol2SMILES(BaseTool):
name:str = "Mol2SMILES"
description :str = "Input a molecular name , returns SMILES."
def __init__(self, chemspace_api_key: str = None):
super().__init__()
def _run(self, query: str) -> str:
"""This function queries the given molecule name and returns a SMILES string from the record"""
"""Useful to get the SMILES string of one molecule by searching the name of a molecule. Only query with one specific name."""
if is_smiles(query) and is_multiple_smiles(query):
return "Multiple SMILES strings detected, input one molecule at a time."
try:
smi = pubchem_query2smiles(query )
return smi
except Exception as e:
try:
csv_data = pd.read_csv('tool/dataset.csv',encoding='ISO-8859-1')
relevant_rows = csv_data[csv_data['Name']==(query)]
if not relevant_rows.empty:
# Get the most relevant answer (assuming we return the first match)
return relevant_rows.iloc[0]['SMILES']
except:
return str(e)
async def _arun(self, query: str) -> str:
"""Use the tool asynchronously."""
raise NotImplementedError()
class SMILES2Name(BaseTool):
name:str = "SMILES2Name"
description:str = "Input SMILES, returns molecule name."
def __init__(self):
super().__init__()
def _run(self, query: str) -> str:
"""Use the tool."""
try:
if not is_smiles(query):
try:
query2smiles = Query2SMILES()
query = query2smiles.run(query)
except:
raise ValueError("Invalid molecule input, no Pubchem entry")
name = smiles2name(query)
return name
except Exception as e:
return "Error: " + str(e)
async def _arun(self, query: str) -> str:
"""Use the tool asynchronously."""
raise NotImplementedError()
|