TeLLAgent / tool /property.py
jinysun's picture
Upload 46 files
64e9ead verified
# -*- coding: utf-8 -*-
"""
Created on Thu Sep 5 21:42:51 2024
@author: BM109X32G-10GPU-02
"""
from langchain.tools import BaseTool
from rdkit import Chem
from rdkit.Chem import rdMolDescriptors
from rdkit.Chem import Descriptors
from utils import *
from rdkit.Chem import RDConfig
from rdkit.ML.Descriptors import MoleculeDescriptors
from rdkit.Contrib.SA_Score import sascorer
class MolSimilarity(BaseTool):
name: str = "MolSimilarity"
description: str = (
"Input two molecule SMILES (separated by '.'), returns Tanimoto similarity."
)
def __init__(self):
super().__init__()
def _run(self, smiles_pair: str) -> str:
smi_list = smiles_pair.split(".")
if len(smi_list) != 2:
return "Input error, please input two smiles strings separated by '.'"
else:
smiles1, smiles2 = smi_list
similarity = tanimoto(smiles1, smiles2)
if isinstance(similarity, str):
return similarity
if similarity == 1:
return "Error: Input Molecules Are Identical"
else:
message = f"The Tanimoto similarity between {smiles1} and {smiles2} is {round(similarity, 4)}"
return message
async def _arun(self, smiles_pair: str) -> str:
"""Use the tool asynchronously."""
raise NotImplementedError()
class SMILES2Weight(BaseTool):
name: str = "SMILES2Weight"
description: str = "Input SMILES, returns molecular weight."
def __init__(
self,
):
super().__init__()
def _run(self, smiles: str) -> str:
mol = Chem.MolFromSmiles(smiles)
if mol is None:
return "Invalid SMILES string"
mol_weight = rdMolDescriptors.CalcExactMolWt(mol)
return mol_weight
async def _arun(self, smiles: str) -> str:
"""Use the tool asynchronously."""
raise NotImplementedError()
class SMILES2LogP(BaseTool):
name: str = "SMILES2LogP"
description: str = "Input SMILES, returns molecular LogP."
def __init__(
self,
):
super().__init__()
def _run(self, smiles: str) -> str:
mol = Chem.MolFromSmiles(smiles)
if mol is None:
return "Invalid SMILES string"
MolLogP = Descriptors.MolLogP(mol)
return MolLogP
async def _arun(self, smiles: str) -> str:
"""Use the tool asynchronously."""
raise NotImplementedError()
class SMILES2SAScore(BaseTool):
name: str = "SMILES2SAScore"
description: str = "Input SMILES, returns synthetic accessibility score to evaluate the difficulty of molecular synthesis."
def __init__(
self,
):
super().__init__()
def _run(self, smiles: str) -> str:
mol = Chem.MolFromSmiles(smiles)
if mol is None:
return "Invalid SMILES string"
SAScore = sascorer.calculateScore(mol)
return f"This SAScore of the molecule is {SAScore}."
async def _arun(self, smiles: str) -> str:
"""Use the tool asynchronously."""
raise NotImplementedError()
class SMILES2Properties(BaseTool):
name: str = "SMILES2Properties"
description: str = "Input SMILES, returns basic physical and chemical properties."
def __init__(
self,
):
super().__init__()
def _run(self, smiles: str) -> str:
mol = Chem.MolFromSmiles(smiles)
if mol is None:
return "Invalid SMILES string"
SAScore = sascorer.calculateScore(mol)
des_list = ['MolWt','NOCount', 'NumHAcceptors', 'NumHDonors', 'MolLogP', 'NumRotatableBonds','RingCount','NumAromaticRings','TPSA']
calculator = MoleculeDescriptors.MolecularDescriptorCalculator(des_list)
results = calculator.CalcDescriptors(mol)
return f"SAScore: {'{:.2f}'.format(SAScore)}; molecular weight: {'{:.2f}'.format(results[0])}; number of Nitrogens and Oxygens: {results[1]}; number of Hydrogen Bond Acceptors: {results[2]}; number of Hydrogen Bond Donors:{results[3]}; LogP:{'{:.2f}'.format(results[4])}; number of Rotatable Bonds: {results[5]}; Ring count: {results[6]}; number of aromatic rings: {results[7]}; TPSA: {'{:.2f}'.format(results[8])}."
async def _arun(self, smiles: str) -> str:
"""Use the tool asynchronously."""
raise NotImplementedError()
class FuncGroups(BaseTool):
name: str = "FunctionalGroups"
description: str = "Input SMILES, return list of functional groups in the molecule."
dict_fgs: dict = None
def __init__(
self,
):
super().__init__()
# List obtained from https://github.com/rdkit/rdkit/blob/master/Data/FunctionalGroups.txt
self.dict_fgs = {
"furan": "o1cccc1",
"aldehydes": " [CX3H1](=O)[#6]",
"esters": " [#6][CX3](=O)[OX2H0][#6]",
"ketones": " [#6][CX3](=O)[#6]",
"amides": " C(=O)-N",
"thiol groups": " [SH]",
"alcohol groups": " [OH]",
"methylamide": "*-[N;D2]-[C;D3](=O)-[C;D1;H3]",
"carboxylic acids": "*-C(=O)[O;D1]",
"carbonyl methylester": "*-C(=O)[O;D2]-[C;D1;H3]",
"terminal aldehyde": "*-C(=O)-[C;D1]",
"amide": "*-C(=O)-[N;D1]",
"carbonyl methyl": "*-C(=O)-[C;D1;H3]",
"isocyanate": "*-[N;D2]=[C;D2]=[O;D1]",
"isothiocyanate": "*-[N;D2]=[C;D2]=[S;D1]",
"nitro": "*-[N;D3](=[O;D1])[O;D1]",
"nitroso": "*-[N;R0]=[O;D1]",
"oximes": "*=[N;R0]-[O;D1]",
"Imines": "*-[N;R0]=[C;D1;H2]",
"terminal azo": "*-[N;D2]=[N;D2]-[C;D1;H3]",
"hydrazines": "*-[N;D2]=[N;D1]",
"diazo": "*-[N;D2]#[N;D1]",
"cyano": "*-[C;D2]#[N;D1]",
"primary sulfonamide": "*-[S;D4](=[O;D1])(=[O;D1])-[N;D1]",
"methyl sulfonamide": "*-[N;D2]-[S;D4](=[O;D1])(=[O;D1])-[C;D1;H3]",
"sulfonic acid": "*-[S;D4](=O)(=O)-[O;D1]",
"methyl ester sulfonyl": "*-[S;D4](=O)(=O)-[O;D2]-[C;D1;H3]",
"methyl sulfonyl": "*-[S;D4](=O)(=O)-[C;D1;H3]",
"sulfonyl chloride": "*-[S;D4](=O)(=O)-[Cl]",
"methyl sulfinyl": "*-[S;D3](=O)-[C;D1]",
"methyl thio": "*-[S;D2]-[C;D1;H3]",
"thiols": "*-[S;D1]",
"thio carbonyls": "*=[S;D1]",
"halogens": "*-[#9,#17,#35,#53]",
"t-butyl": "*-[C;D4]([C;D1])([C;D1])-[C;D1]",
"tri fluoromethyl": "*-[C;D4](F)(F)F",
"acetylenes": "*-[C;D2]#[C;D1;H]",
"cyclopropyl": "*-[C;D3]1-[C;D2]-[C;D2]1",
"ethoxy": "*-[O;D2]-[C;D2]-[C;D1;H3]",
"methoxy": "*-[O;D2]-[C;D1;H3]",
"side-chain hydroxyls": "*-[O;D1]",
"ketones": "*=[O;D1]",
"primary amines": "*-[N;D1]",
"nitriles": "*#[N;D1]",
}
def _is_fg_in_mol(self, mol, fg):
fgmol = Chem.MolFromSmarts(fg)
mol = Chem.MolFromSmiles(mol.strip())
return len(Chem.Mol.GetSubstructMatches(mol, fgmol, uniquify=True)) > 0
def _run(self, smiles: str) -> str:
"""
Input a molecule SMILES or name.
Returns a list of functional groups identified by their common name (in natural language).
"""
try:
fgs_in_molec = [
name
for name, fg in self.dict_fgs.items()
if self._is_fg_in_mol(smiles, fg)
]
if len(fgs_in_molec) > 1:
return f"This molecule contains {', '.join(fgs_in_molec[:-1])}, and {fgs_in_molec[-1]}."
else:
return f"This molecule contains {fgs_in_molec[0]}."
except:
return "Wrong argument. Please input a valid molecular SMILES."
async def _arun(self, smiles: str) -> str:
"""Use the tool asynchronously."""
raise NotImplementedError()