TeLLAgent / tool /PCE.py
jinysun's picture
Update tool/PCE.py
ab4f034 verified
# -*- coding: utf-8 -*-
"""
Created on Wed Sep 11 10:27:20 2024
@author: BM109X32G-10GPU-02
"""
from langchain.tools import BaseTool
from rdkit import Chem
from rdkit.Chem import rdMolDescriptors
from rdkit.Chem import Descriptors
from .deepacceptor import RF
from .deepdonor import sm, pm
from .dap import run, screen
import pandas as pd
class acceptor_predictor(BaseTool):
name:str = "acceptor_predictor"
description:str = (
"Input acceptor SMILES , returns the score of the acceptor."
)
def __init__(self):
super().__init__()
def _run(self, smiles: str) -> str:
mol = Chem.MolFromSmiles(smiles)
if mol is None:
return "Invalid SMILES string"
smiles = Chem.MolToSmiles(mol)
pce = RF.main( str(smiles) )
return f'The power conversion efficiency (PCE) is predicted to be {pce} (predicted by DeepAcceptor) '
async def _arun(self, smiles: str) -> str:
"""Use the tool asynchronously."""
raise NotImplementedError()
class donor_predictor(BaseTool):
name:str = "donor_predictor"
description:str = (
"Input donor SMILES , returns the score of the donor."
)
def __init__(self):
super().__init__()
def _run(self, smiles: str) -> str:
mol = Chem.MolFromSmiles(smiles)
if mol is None:
return "Invalid SMILES string"
mol = Chem.MolFromSmiles(smiles)
if mol is None:
return "Invalid SMILES string"
sdpce = sm.main( str(smiles) )
pdpce = pm.main( str(smiles) )
return f'The power conversion efficiency (PCE) of the given molecule is predicted to be {sdpce} as a small molecule donor , and {pdpce} as a polymer donor(predicted by DeepDonor) '
async def _arun(self, smiles: str) -> str:
"""Use the tool asynchronously."""
raise NotImplementedError()
class dap_predictor(BaseTool):
name:str = "dap_predictor"
description :str = (
"Input SMILES of D/A pairs(separated by '.') , returns the performance of the D/A pairs ."
)
def __init__(self):
super().__init__()
def _run(self, smiles_pair: str) -> str:
smi_list = smiles_pair.split(".")
if len(smi_list) != 2:
return "Input error, please input two smiles strings separated by '.'"
else:
smiles1, smiles2 = smi_list
pce = run.smiles_aas_test( str(smiles1 ), str(smiles2) )
return pce
async def _arun(self, smiles_pair: str) -> str:
"""Use the tool asynchronously."""
raise NotImplementedError()
class dap_screen(BaseTool):
name:str = "dap_screen"
description :str = (
"Input dataset path containing D/A pairs, returns the files of prediction results."
)
return_direct: bool = True
def __init__(self):
super().__init__()
def _run(self, file_path: str) -> str:
smi_list = screen.smiles_aas_test(file_path)
return smi_list
async def _arun(self, smiles_pair: str) -> str:
"""Use the tool asynchronously."""
raise NotImplementedError()
from .comget import generator
class molgen(BaseTool):
name: str = "donorgen"
description: str = (
"Useful to generate polymer donor molecules with required PCE. "
"Input the values of PCE , return the SMILES"
)
def __init__(self
):
super().__init__( )
def _run(self, value ) -> str:
try:
results = generator.generation(value)
for i in results['smiles']:
pdpce = pm.main( str(i) )
if abs(pdpce-float(value))<1.0:
return f"The SMILES of generated donor is {i}, its predicted PCE is {pdpce}."
break
except Exception as e:
return str(e)
async def _arun(self, query) -> str:
"""Use the tool asynchronously."""
raise NotImplementedError("this tool does not support async")
from rdkit import Chem
from rdkit.Chem import BRICS
from typing import List
def connect_A_B_A(a_smiles: str, b_smiles: str) -> str:
mol_A = Chem.MolFromSmiles(a_smiles)
mol_B = Chem.MolFromSmiles(b_smiles)
final_product_smiles = set()
ab_intermediates_gen = BRICS.BRICSBuild([mol_A, mol_B])
for intermediate_mol in ab_intermediates_gen:
Chem.SanitizeMol(intermediate_mol)
aba_products_gen = BRICS.BRICSBuild([intermediate_mol, mol_A])
for final_mol in aba_products_gen:
try:
Chem.SanitizeMol(final_mol)
final_product_smiles.add(Chem.MolToSmiles(final_mol))
except Exception as e:
print(f" {e}")
final_mols = [Chem.MolFromSmiles(s) for s in sorted(list(final_product_smiles))]
smiles = Chem.MolToSmiles(final_mols[0])
return smiles
class QMGen(BaseTool):
name: str = "QMGen"
description: str = (
"Useful to generateQuasi-macromolecule . "
"Input two molecule SMILES (separated by '.') , return the SMILES"
)
def __init__(self):
super().__init__( )
def _run(self, smiles_pair: str) -> str:
smi_list = smiles_pair.split(".")
if len(smi_list) != 2:
return "Input error, please input two smiles strings separated by '.'"
else:
smiles1, smiles2 = smi_list
frag_1dummy_s = [smiles for smiles in smi_list if smiles.count('*')==1]
frag_2dummy_s = [smiles for smiles in smi_list if smiles.count('*')==2]
if len(frag_1dummy_s) != len(frag_1dummy_s):
return "Input error, please input acceptor (A) unit smiles with one '[*]' and Π bridge unit with two '[*]'."
smiles1 = frag_1dummy_s[0].replace('[*]','[16*]').replace('C[16*]','[16*]').replace('[16*]C','[16*]')
smiles2= frag_2dummy_s[0].replace('[*]','[14*]')
if len(frag_1dummy_s) != 1:
return "Input error, please input smiles with '[*]' as the the key to be connected e.g.[14*]c1ccc([14*])s1."
try:
products = connect_A_B_A(smiles1, smiles2)
return products
except Exception as e:
return str(e)
async def _arun(self, query) -> str:
"""Use the tool asynchronously."""
raise NotImplementedError("this tool does not support async")