cafierom commited on
Commit
7c995c8
·
verified ·
1 Parent(s): b715f7c

Create chem_nodes.py

Browse files
Files changed (1) hide show
  1. chem_nodes.py +160 -0
chem_nodes.py ADDED
@@ -0,0 +1,160 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from typing import Annotated, TypedDict, Literal
3
+ from langchain_community.tools import DuckDuckGoSearchRun
4
+ from langchain_core.tools import tool
5
+ from langgraph.prebuilt import ToolNode, tools_condition
6
+ from langgraph.graph import StateGraph, START, END
7
+ from langgraph.graph.message import add_messages
8
+ from langchain_core.messages import SystemMessage, trim_messages, AIMessage, HumanMessage, ToolCall
9
+
10
+ import re
11
+ import matplotlib.pyplot as plt
12
+
13
+ from rdkit import Chem
14
+ from rdkit.Chem import AllChem, QED
15
+ from rdkit.Chem import Draw
16
+ from rdkit.Chem.Draw import MolsToGridImage
17
+ from rdkit import rdBase
18
+ from rdkit.Chem import rdMolAlign
19
+ import os, re
20
+ from rdkit import RDConfig
21
+ import pubchempy as pcp
22
+ import gradio as gr
23
+ from PIL import Image
24
+
25
+ class State(TypedDict):
26
+ '''
27
+ The state of the agent.
28
+ '''
29
+ messages: Annotated[list, add_messages]
30
+ query_smiles: str
31
+ query_task: str
32
+ query_name: str
33
+ query_reference: str
34
+ tool_choice: tuple
35
+ which_tool: int
36
+ props_string: str
37
+ similars_img: str
38
+ loop_again: str
39
+
40
+ def name_node(state: State) -> State:
41
+ '''
42
+ Queries Pubchem for the name of the molecule based on the smiles string.
43
+
44
+ Args:
45
+ smiles: the input smiles string
46
+ Returns:
47
+ name: the name of the molecule
48
+ props_string: a string of the tool results
49
+ '''
50
+ print("name tool")
51
+ print('===================================================')
52
+ current_props_string = state["props_string"]
53
+
54
+ try:
55
+ smiles = state["query_smiles"]
56
+ res = pcp.get_compounds(smiles, "smiles")
57
+ name = res[0].iupac_name
58
+ name_string = f'IUPAC molecule name: {name}\n'
59
+ #print(smiles, name)
60
+ syn_list = pcp.get_synonyms(res[0].cid)
61
+ for alt_name in syn_list[0]['Synonym'][:5]:
62
+ name_string += f'alternative or common name: {alt_name}\n'
63
+ except:
64
+ name = "unknown"
65
+ name_string = 'Could not find name for molecule'
66
+
67
+ state["query_name"] = name
68
+
69
+ current_props_string += name_string
70
+ state["props_string"] = current_props_string
71
+ state["which_tool"] += 1
72
+ return state
73
+
74
+ def smiles_node(state: State) -> State:
75
+ '''
76
+ Queries Pubchem for the smiles string of the molecule based on the name.
77
+ Args:
78
+ smiles: the molecule name
79
+ Returns:
80
+ smiles: the smiles string of the molecule
81
+ props_string: a string of the tool results
82
+ '''
83
+ print("smiles tool")
84
+ print('===================================================')
85
+ current_props_string = state["props_string"]
86
+
87
+ try:
88
+ name = state["query_name"]
89
+ res = pcp.get_compounds(name, "name")
90
+ smiles = res[0].smiles
91
+ smiles_string = f'molecule SMILES: {smiles}\n'
92
+ except:
93
+ smiles = "unknown"
94
+ smiles_string = 'Could not find smiles for molecule'
95
+
96
+ state["query_smiles"] = smiles
97
+
98
+ current_props_string += smiles_string
99
+ state["props_string"] = current_props_string
100
+ state["which_tool"] += 1
101
+ return state
102
+
103
+ def similars_node(state: State) -> State:
104
+ '''
105
+ Queries Pubchem for similar molecules based on the smiles string or name
106
+
107
+ Args:
108
+ smiles: the input smiles string, OR
109
+ name: the molecule name
110
+ Returns:
111
+ props_string: a string of the tool results.
112
+ '''
113
+ print("similars tool")
114
+ print('===================================================')
115
+ current_props_string = state["props_string"]
116
+
117
+ try:
118
+ if state['query_smiles'] != None:
119
+ smiles = state["query_smiles"]
120
+ res = pcp.get_compounds(smiles, "smiles", searchtype="similarity",listkey_count=20)
121
+ elif state['query_name'] != None:
122
+ name = state["query_name"]
123
+ res = pcp.get_compounds(name, "name", searchtype="similarity",listkey_count=20)
124
+ else:
125
+ print('Not enough information to run similars tool')
126
+ return state
127
+
128
+ props_string = 'Found Similar compounds: \n'
129
+ sub_smiles = []
130
+
131
+ i = 0
132
+ for compound in res:
133
+ if i == 0:
134
+ print(compound.iupac_name)
135
+ i+=1
136
+ sub_smiles.append(compound.smiles)
137
+ props_string += f'Name: {compound.iupac_name}\n'
138
+ props_string += f'SMILES: {compound.smiles}\n'
139
+ props_string += f'Molecular Weight: {compound.molecular_weight}\n'
140
+ props_string += f'LogP: {compound.xlogp}\n'
141
+ props_string += '==================='
142
+
143
+ sub_mols = [Chem.MolFromSmiles(smile) for smile in sub_smiles]
144
+ legend = [str(compound.smiles) for compound in res]
145
+
146
+ img = Draw.MolsToGridImage(sub_mols, legends=legend, molsPerRow=4, subImgSize=(250, 250))
147
+ pic = img.data
148
+
149
+ filename = "Similars_image"
150
+ with open(filename+".png",'wb+') as outf:
151
+ outf.write(pic)
152
+ except:
153
+ props_string = 'Could not find similar molecules'
154
+ filename = None
155
+
156
+ current_props_string += props_string
157
+ state["props_string"] = current_props_string
158
+ state['similars_img'] = filename
159
+ state["which_tool"] += 1
160
+ return state