File size: 2,543 Bytes
d3bbf03
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17ecd02
 
 
d3bbf03
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64e9ead
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
# -*- coding: utf-8 -*-
"""
Created on Wed Oct 30 09:14:55 2024

@author: BM109X32G-10GPU-02
"""


 
import numpy as np
from rdkit.Chem import AllChem
from sklearn.datasets import make_blobs
import json
import numpy as np
import math
 
from scipy import sparse
from sklearn.metrics import median_absolute_error,r2_score, mean_absolute_error,mean_squared_error
from langchain.tools import BaseTool
import pandas as pd
 
from rdkit import Chem
import pickle
from sklearn.ensemble import RandomForestRegressor
 

def split_string(string):
    
    result = []
   
    for char in string:
      
        result.append(char)
 
    return result
 
def main(sm):
   
 
        inchis = list([sm])
        rts = list([0])
        
        smiles, targets,features = [], [],[]
        for i, inc in enumerate(inchis):
            mol = Chem.MolFromSmiles(inc)
            if mol is None:
                continue
            else:
                smi =AllChem. GetMorganFingerprintAsBitVect(mol,1024)
                smi = smi.ToBitString()
                a = split_string(smi)
                a = np.array(a)
                #smi = Chem.MolToSmiles(mol)
                features.append(a)
                targets.append(rts[i])
                
       

        features = np.asarray(features)
        targets = np.asarray(targets)
        X_test=features
        Y_test=targets
        n_features=10
        
        model = RandomForestRegressor(n_estimators=100)   
        load_homo = pickle.load(open(r"tool/orbital/homo.dat", 'rb'))
        load_lumo = pickle.load(open(r"tool/orbital/lumo.dat", 'rb'))
 
        Y_homo= load_homo.predict(X_test)
        Y_lumo = load_lumo.predict(X_test)
        homo =  float(Y_homo)
        lumo =  float(Y_lumo)
        return homo, lumo

class homolumo_predictor(BaseTool):
    name: str = "homolumo_predictor"
    description: str = (
        "Input SMILES , returns the HOMO/LUMO  (Highest Occupied Molecular Orbital (HOMO) \
        and  Lowest Unoccupied Molecular Orbital)."
    )
    def __init__(self):
        super().__init__()
    def _run(self, smiles: str) -> str:
        mol = Chem.MolFromSmiles(smiles)
        if mol is None:
            return "Invalid SMILES string"
        Y_homo, Y_lumo = main( str(smiles) ) 
        return f"The HOMO is predicted to be {'{:.2f}'.format(Y_homo)} eV , the LUMO is predicted to be {'{:.2f}'.format(Y_lumo)}  eV" 
 
    async def _arun(self, smiles: str) -> str:
        """Use the tool asynchronously."""
        raise NotImplementedError()