|
|
|
|
|
"""
|
|
|
Created on Thu Sep 5 21:42:51 2024
|
|
|
|
|
|
@author: BM109X32G-10GPU-02
|
|
|
"""
|
|
|
|
|
|
from langchain.tools import BaseTool
|
|
|
from rdkit import Chem
|
|
|
from rdkit.Chem import rdMolDescriptors
|
|
|
from rdkit.Chem import Descriptors
|
|
|
from utils import *
|
|
|
from rdkit.Chem import RDConfig
|
|
|
from rdkit.ML.Descriptors import MoleculeDescriptors
|
|
|
|
|
|
from rdkit.Contrib.SA_Score import sascorer
|
|
|
|
|
|
|
|
|
class MolSimilarity(BaseTool):
|
|
|
name: str = "MolSimilarity"
|
|
|
description: str = (
|
|
|
"Input two molecule SMILES (separated by '.'), returns Tanimoto similarity."
|
|
|
)
|
|
|
|
|
|
def __init__(self):
|
|
|
super().__init__()
|
|
|
|
|
|
def _run(self, smiles_pair: str) -> str:
|
|
|
smi_list = smiles_pair.split(".")
|
|
|
if len(smi_list) != 2:
|
|
|
return "Input error, please input two smiles strings separated by '.'"
|
|
|
else:
|
|
|
smiles1, smiles2 = smi_list
|
|
|
|
|
|
similarity = tanimoto(smiles1, smiles2)
|
|
|
|
|
|
if isinstance(similarity, str):
|
|
|
return similarity
|
|
|
|
|
|
if similarity == 1:
|
|
|
return "Error: Input Molecules Are Identical"
|
|
|
else:
|
|
|
|
|
|
message = f"The Tanimoto similarity between {smiles1} and {smiles2} is {round(similarity, 4)}"
|
|
|
return message
|
|
|
|
|
|
async def _arun(self, smiles_pair: str) -> str:
|
|
|
"""Use the tool asynchronously."""
|
|
|
raise NotImplementedError()
|
|
|
|
|
|
|
|
|
class SMILES2Weight(BaseTool):
|
|
|
name: str = "SMILES2Weight"
|
|
|
description: str = "Input SMILES, returns molecular weight."
|
|
|
|
|
|
def __init__(
|
|
|
self,
|
|
|
):
|
|
|
super().__init__()
|
|
|
|
|
|
def _run(self, smiles: str) -> str:
|
|
|
mol = Chem.MolFromSmiles(smiles)
|
|
|
if mol is None:
|
|
|
return "Invalid SMILES string"
|
|
|
mol_weight = rdMolDescriptors.CalcExactMolWt(mol)
|
|
|
return mol_weight
|
|
|
|
|
|
async def _arun(self, smiles: str) -> str:
|
|
|
"""Use the tool asynchronously."""
|
|
|
raise NotImplementedError()
|
|
|
|
|
|
class SMILES2LogP(BaseTool):
|
|
|
name: str = "SMILES2LogP"
|
|
|
description: str = "Input SMILES, returns molecular LogP."
|
|
|
|
|
|
def __init__(
|
|
|
self,
|
|
|
):
|
|
|
super().__init__()
|
|
|
|
|
|
def _run(self, smiles: str) -> str:
|
|
|
mol = Chem.MolFromSmiles(smiles)
|
|
|
if mol is None:
|
|
|
return "Invalid SMILES string"
|
|
|
MolLogP = Descriptors.MolLogP(mol)
|
|
|
return MolLogP
|
|
|
|
|
|
async def _arun(self, smiles: str) -> str:
|
|
|
"""Use the tool asynchronously."""
|
|
|
raise NotImplementedError()
|
|
|
|
|
|
class SMILES2SAScore(BaseTool):
|
|
|
name: str = "SMILES2SAScore"
|
|
|
description: str = "Input SMILES, returns synthetic accessibility score to evaluate the difficulty of molecular synthesis."
|
|
|
|
|
|
def __init__(
|
|
|
self,
|
|
|
):
|
|
|
super().__init__()
|
|
|
|
|
|
def _run(self, smiles: str) -> str:
|
|
|
mol = Chem.MolFromSmiles(smiles)
|
|
|
if mol is None:
|
|
|
return "Invalid SMILES string"
|
|
|
SAScore = sascorer.calculateScore(mol)
|
|
|
return f"This SAScore of the molecule is {SAScore}."
|
|
|
|
|
|
async def _arun(self, smiles: str) -> str:
|
|
|
"""Use the tool asynchronously."""
|
|
|
raise NotImplementedError()
|
|
|
|
|
|
class SMILES2Properties(BaseTool):
|
|
|
name: str = "SMILES2Properties"
|
|
|
description: str = "Input SMILES, returns basic physical and chemical properties."
|
|
|
|
|
|
def __init__(
|
|
|
self,
|
|
|
):
|
|
|
super().__init__()
|
|
|
|
|
|
def _run(self, smiles: str) -> str:
|
|
|
mol = Chem.MolFromSmiles(smiles)
|
|
|
if mol is None:
|
|
|
return "Invalid SMILES string"
|
|
|
SAScore = sascorer.calculateScore(mol)
|
|
|
des_list = ['MolWt','NOCount', 'NumHAcceptors', 'NumHDonors', 'MolLogP', 'NumRotatableBonds','RingCount','NumAromaticRings','TPSA']
|
|
|
calculator = MoleculeDescriptors.MolecularDescriptorCalculator(des_list)
|
|
|
results = calculator.CalcDescriptors(mol)
|
|
|
|
|
|
|
|
|
return f"SAScore: {'{:.2f}'.format(SAScore)}; molecular weight: {'{:.2f}'.format(results[0])}; number of Nitrogens and Oxygens: {results[1]}; number of Hydrogen Bond Acceptors: {results[2]}; number of Hydrogen Bond Donors:{results[3]}; LogP:{'{:.2f}'.format(results[4])}; number of Rotatable Bonds: {results[5]}; Ring count: {results[6]}; number of aromatic rings: {results[7]}; TPSA: {'{:.2f}'.format(results[8])}."
|
|
|
|
|
|
async def _arun(self, smiles: str) -> str:
|
|
|
"""Use the tool asynchronously."""
|
|
|
raise NotImplementedError()
|
|
|
|
|
|
class FuncGroups(BaseTool):
|
|
|
name: str = "FunctionalGroups"
|
|
|
description: str = "Input SMILES, return list of functional groups in the molecule."
|
|
|
dict_fgs: dict = None
|
|
|
|
|
|
def __init__(
|
|
|
self,
|
|
|
):
|
|
|
super().__init__()
|
|
|
|
|
|
|
|
|
self.dict_fgs = {
|
|
|
"furan": "o1cccc1",
|
|
|
"aldehydes": " [CX3H1](=O)[#6]",
|
|
|
"esters": " [#6][CX3](=O)[OX2H0][#6]",
|
|
|
"ketones": " [#6][CX3](=O)[#6]",
|
|
|
"amides": " C(=O)-N",
|
|
|
"thiol groups": " [SH]",
|
|
|
"alcohol groups": " [OH]",
|
|
|
"methylamide": "*-[N;D2]-[C;D3](=O)-[C;D1;H3]",
|
|
|
"carboxylic acids": "*-C(=O)[O;D1]",
|
|
|
"carbonyl methylester": "*-C(=O)[O;D2]-[C;D1;H3]",
|
|
|
"terminal aldehyde": "*-C(=O)-[C;D1]",
|
|
|
"amide": "*-C(=O)-[N;D1]",
|
|
|
"carbonyl methyl": "*-C(=O)-[C;D1;H3]",
|
|
|
"isocyanate": "*-[N;D2]=[C;D2]=[O;D1]",
|
|
|
"isothiocyanate": "*-[N;D2]=[C;D2]=[S;D1]",
|
|
|
"nitro": "*-[N;D3](=[O;D1])[O;D1]",
|
|
|
"nitroso": "*-[N;R0]=[O;D1]",
|
|
|
"oximes": "*=[N;R0]-[O;D1]",
|
|
|
"Imines": "*-[N;R0]=[C;D1;H2]",
|
|
|
"terminal azo": "*-[N;D2]=[N;D2]-[C;D1;H3]",
|
|
|
"hydrazines": "*-[N;D2]=[N;D1]",
|
|
|
"diazo": "*-[N;D2]#[N;D1]",
|
|
|
"cyano": "*-[C;D2]#[N;D1]",
|
|
|
"primary sulfonamide": "*-[S;D4](=[O;D1])(=[O;D1])-[N;D1]",
|
|
|
"methyl sulfonamide": "*-[N;D2]-[S;D4](=[O;D1])(=[O;D1])-[C;D1;H3]",
|
|
|
"sulfonic acid": "*-[S;D4](=O)(=O)-[O;D1]",
|
|
|
"methyl ester sulfonyl": "*-[S;D4](=O)(=O)-[O;D2]-[C;D1;H3]",
|
|
|
"methyl sulfonyl": "*-[S;D4](=O)(=O)-[C;D1;H3]",
|
|
|
"sulfonyl chloride": "*-[S;D4](=O)(=O)-[Cl]",
|
|
|
"methyl sulfinyl": "*-[S;D3](=O)-[C;D1]",
|
|
|
"methyl thio": "*-[S;D2]-[C;D1;H3]",
|
|
|
"thiols": "*-[S;D1]",
|
|
|
"thio carbonyls": "*=[S;D1]",
|
|
|
"halogens": "*-[#9,#17,#35,#53]",
|
|
|
"t-butyl": "*-[C;D4]([C;D1])([C;D1])-[C;D1]",
|
|
|
"tri fluoromethyl": "*-[C;D4](F)(F)F",
|
|
|
"acetylenes": "*-[C;D2]#[C;D1;H]",
|
|
|
"cyclopropyl": "*-[C;D3]1-[C;D2]-[C;D2]1",
|
|
|
"ethoxy": "*-[O;D2]-[C;D2]-[C;D1;H3]",
|
|
|
"methoxy": "*-[O;D2]-[C;D1;H3]",
|
|
|
"side-chain hydroxyls": "*-[O;D1]",
|
|
|
"ketones": "*=[O;D1]",
|
|
|
"primary amines": "*-[N;D1]",
|
|
|
"nitriles": "*#[N;D1]",
|
|
|
}
|
|
|
|
|
|
def _is_fg_in_mol(self, mol, fg):
|
|
|
fgmol = Chem.MolFromSmarts(fg)
|
|
|
mol = Chem.MolFromSmiles(mol.strip())
|
|
|
return len(Chem.Mol.GetSubstructMatches(mol, fgmol, uniquify=True)) > 0
|
|
|
|
|
|
def _run(self, smiles: str) -> str:
|
|
|
"""
|
|
|
Input a molecule SMILES or name.
|
|
|
Returns a list of functional groups identified by their common name (in natural language).
|
|
|
"""
|
|
|
try:
|
|
|
fgs_in_molec = [
|
|
|
name
|
|
|
for name, fg in self.dict_fgs.items()
|
|
|
if self._is_fg_in_mol(smiles, fg)
|
|
|
]
|
|
|
if len(fgs_in_molec) > 1:
|
|
|
return f"This molecule contains {', '.join(fgs_in_molec[:-1])}, and {fgs_in_molec[-1]}."
|
|
|
else:
|
|
|
return f"This molecule contains {fgs_in_molec[0]}."
|
|
|
except:
|
|
|
return "Wrong argument. Please input a valid molecular SMILES."
|
|
|
|
|
|
async def _arun(self, smiles: str) -> str:
|
|
|
"""Use the tool asynchronously."""
|
|
|
raise NotImplementedError()
|
|
|
|