File size: 6,555 Bytes
ab4f034 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 |
# -*- coding: utf-8 -*-
"""
Created on Wed Sep 11 10:27:20 2024
@author: BM109X32G-10GPU-02
"""
from langchain.tools import BaseTool
from rdkit import Chem
from rdkit.Chem import rdMolDescriptors
from rdkit.Chem import Descriptors
from .deepacceptor import RF
from .deepdonor import sm, pm
from .dap import run, screen
import pandas as pd
class acceptor_predictor(BaseTool):
name:str = "acceptor_predictor"
description:str = (
"Input acceptor SMILES , returns the score of the acceptor."
)
def __init__(self):
super().__init__()
def _run(self, smiles: str) -> str:
mol = Chem.MolFromSmiles(smiles)
if mol is None:
return "Invalid SMILES string"
smiles = Chem.MolToSmiles(mol)
pce = RF.main( str(smiles) )
return f'The power conversion efficiency (PCE) is predicted to be {pce} (predicted by DeepAcceptor) '
async def _arun(self, smiles: str) -> str:
"""Use the tool asynchronously."""
raise NotImplementedError()
class donor_predictor(BaseTool):
name:str = "donor_predictor"
description:str = (
"Input donor SMILES , returns the score of the donor."
)
def __init__(self):
super().__init__()
def _run(self, smiles: str) -> str:
mol = Chem.MolFromSmiles(smiles)
if mol is None:
return "Invalid SMILES string"
mol = Chem.MolFromSmiles(smiles)
if mol is None:
return "Invalid SMILES string"
sdpce = sm.main( str(smiles) )
pdpce = pm.main( str(smiles) )
return f'The power conversion efficiency (PCE) of the given molecule is predicted to be {sdpce} as a small molecule donor , and {pdpce} as a polymer donor(predicted by DeepDonor) '
async def _arun(self, smiles: str) -> str:
"""Use the tool asynchronously."""
raise NotImplementedError()
class dap_predictor(BaseTool):
name:str = "dap_predictor"
description :str = (
"Input SMILES of D/A pairs(separated by '.') , returns the performance of the D/A pairs ."
)
def __init__(self):
super().__init__()
def _run(self, smiles_pair: str) -> str:
smi_list = smiles_pair.split(".")
if len(smi_list) != 2:
return "Input error, please input two smiles strings separated by '.'"
else:
smiles1, smiles2 = smi_list
pce = run.smiles_aas_test( str(smiles1 ), str(smiles2) )
return pce
async def _arun(self, smiles_pair: str) -> str:
"""Use the tool asynchronously."""
raise NotImplementedError()
class dap_screen(BaseTool):
name:str = "dap_screen"
description :str = (
"Input dataset path containing D/A pairs, returns the files of prediction results."
)
return_direct: bool = True
def __init__(self):
super().__init__()
def _run(self, file_path: str) -> str:
smi_list = screen.smiles_aas_test(file_path)
return smi_list
async def _arun(self, smiles_pair: str) -> str:
"""Use the tool asynchronously."""
raise NotImplementedError()
from .comget import generator
class molgen(BaseTool):
name: str = "donorgen"
description: str = (
"Useful to generate polymer donor molecules with required PCE. "
"Input the values of PCE , return the SMILES"
)
def __init__(self
):
super().__init__( )
def _run(self, value ) -> str:
try:
results = generator.generation(value)
for i in results['smiles']:
pdpce = pm.main( str(i) )
if abs(pdpce-float(value))<1.0:
return f"The SMILES of generated donor is {i}, its predicted PCE is {pdpce}."
break
except Exception as e:
return str(e)
async def _arun(self, query) -> str:
"""Use the tool asynchronously."""
raise NotImplementedError("this tool does not support async")
from rdkit import Chem
from rdkit.Chem import BRICS
from typing import List
def connect_A_B_A(a_smiles: str, b_smiles: str) -> str:
mol_A = Chem.MolFromSmiles(a_smiles)
mol_B = Chem.MolFromSmiles(b_smiles)
final_product_smiles = set()
ab_intermediates_gen = BRICS.BRICSBuild([mol_A, mol_B])
for intermediate_mol in ab_intermediates_gen:
Chem.SanitizeMol(intermediate_mol)
aba_products_gen = BRICS.BRICSBuild([intermediate_mol, mol_A])
for final_mol in aba_products_gen:
try:
Chem.SanitizeMol(final_mol)
final_product_smiles.add(Chem.MolToSmiles(final_mol))
except Exception as e:
print(f" {e}")
final_mols = [Chem.MolFromSmiles(s) for s in sorted(list(final_product_smiles))]
smiles = Chem.MolToSmiles(final_mols[0])
return smiles
class QMGen(BaseTool):
name: str = "QMGen"
description: str = (
"Useful to generateQuasi-macromolecule . "
"Input two molecule SMILES (separated by '.') , return the SMILES"
)
def __init__(self):
super().__init__( )
def _run(self, smiles_pair: str) -> str:
smi_list = smiles_pair.split(".")
if len(smi_list) != 2:
return "Input error, please input two smiles strings separated by '.'"
else:
smiles1, smiles2 = smi_list
frag_1dummy_s = [smiles for smiles in smi_list if smiles.count('*')==1]
frag_2dummy_s = [smiles for smiles in smi_list if smiles.count('*')==2]
if len(frag_1dummy_s) != len(frag_1dummy_s):
return "Input error, please input acceptor (A) unit smiles with one '[*]' and Π bridge unit with two '[*]'."
smiles1 = frag_1dummy_s[0].replace('[*]','[16*]').replace('C[16*]','[16*]').replace('[16*]C','[16*]')
smiles2= frag_2dummy_s[0].replace('[*]','[14*]')
if len(frag_1dummy_s) != 1:
return "Input error, please input smiles with '[*]' as the the key to be connected e.g.[14*]c1ccc([14*])s1."
try:
products = connect_A_B_A(smiles1, smiles2)
return products
except Exception as e:
return str(e)
async def _arun(self, query) -> str:
"""Use the tool asynchronously."""
raise NotImplementedError("this tool does not support async") |