jinysun commited on
Commit
dbaa85f
·
verified ·
1 Parent(s): 9021576

Upload 9 files

Browse files
Files changed (9) hide show
  1. agent.py +126 -0
  2. app.py +219 -0
  3. prompts.py +110 -0
  4. requirement.txt +22 -0
  5. search.py +32 -0
  6. streamlit_callback_handler.py +204 -0
  7. test.py +262 -0
  8. tools.py +70 -0
  9. utils.py +159 -0
agent.py ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Optional
2
+
3
+ import langchain
4
+ from dotenv import load_dotenv
5
+ from langchain_core.prompts import PromptTemplate
6
+ from langchain import chains
7
+ from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
8
+ from pydantic import ValidationError
9
+ from langchain.agents import AgentExecutor
10
+ from langchain.agents.mrkl.base import ZeroShotAgent
11
+ from prompts import FORMAT_INSTRUCTIONS, QUESTION_PROMPT, QUESTION_PROMPT1, SUFFIX
12
+ from tools import make_tools
13
+
14
+ from rmrkl import ChatZeroShotAgent, RetryAgentExecutor
15
+
16
+ import base64
17
+ from io import BytesIO
18
+ from PIL import Image
19
+ from IPython.display import HTML, display
20
+ from langchain_openai import ChatOpenAI , OpenAI
21
+
22
+ def convert_to_base64(pil_image):
23
+ buffered = BytesIO()
24
+ pil_image.save(buffered, format="PNG")
25
+ img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
26
+ return img_str
27
+
28
+ def _make_llm(model, temp, api_key, streaming: bool = False):
29
+ if model.startswith("claude") or model.startswith("gpt-3"):
30
+ llm = OpenAI(
31
+ temperature=temp,
32
+ model_name=model,
33
+ max_tokens = 5000,
34
+ openai_api_key=api_key,
35
+ base_url="https://www.dmxapi.com/v1"
36
+ )
37
+ elif model.startswith("gpt-4o-2024-11-20") or model.startswith("deepseek"):
38
+ llm = ChatOpenAI(model=model,
39
+ temperature = 0.1,
40
+
41
+ timeout=1000,
42
+
43
+ callbacks=[StreamingStdOutCallbackHandler()],
44
+ openai_api_key=api_key,base_url="https://www.dmxapi.com/v1"
45
+ )
46
+
47
+ else:
48
+ raise ValueError(f"Invalid model name: {model}")
49
+ return llm
50
+
51
+
52
+ class TeLLAgent:
53
+ def __init__(
54
+ self,
55
+ tools=None,
56
+ model1: str = "deepseek-ai/DeepSeek-R1",
57
+ model2: str = "deepseek-ai/DeepSeek-V3",
58
+ tools_model="gpt-4o-2024-11-20",
59
+ temp=0.1,
60
+ max_iterations=50,
61
+ verbose=True,
62
+ streaming: bool = True,
63
+ openai_api_key= None,
64
+ api_keys: str = {},
65
+ file_path: str= r"...",
66
+ image_path: str = r"..."
67
+ ):
68
+ """Initialize agent."""
69
+ self.file_path = file_path
70
+ self.image_path = image_path
71
+ load_dotenv()
72
+ try:
73
+ self.llm1 = _make_llm(model1, temp, openai_api_key, streaming)
74
+ self.llm2 = _make_llm(model2, temp, openai_api_key, streaming)
75
+ except ValidationError:
76
+ raise ValueError("Invalid OpenAI API key")
77
+
78
+ if tools is None:
79
+ api_keys["OPENAI_API_KEY"] = 'sk-itPrztYm9F6XZZpsBMJB9O7Vq0pYUABVVBSoThuBxEGTnDik'
80
+ tools_llm = _make_llm(tools_model, temp, openai_api_key, streaming)
81
+ tools = make_tools(tools_llm, api_keys=api_keys, verbose=verbose, image_path = image_path, file_path = file_path)
82
+
83
+ # Initialize agent
84
+ self.agent_executor1 = RetryAgentExecutor.from_agent_and_tools(
85
+ tools=tools,
86
+ agent=ChatZeroShotAgent.from_llm_and_tools(
87
+ self.llm1,
88
+ tools,
89
+ suffix=SUFFIX,
90
+ format_instructions=FORMAT_INSTRUCTIONS,
91
+ question_prompt=QUESTION_PROMPT1, return_intermediate_steps=True ,handle_parsing_errors=True
92
+ ),
93
+ verbose=True,
94
+ max_iterations=1 , return_intermediate_steps=True, handle_parsing_errors=True
95
+ )
96
+ self.agent_executor2 = RetryAgentExecutor.from_agent_and_tools(
97
+ tools=tools,
98
+ agent=ChatZeroShotAgent.from_llm_and_tools(
99
+ self.llm2,
100
+ tools,
101
+ suffix=SUFFIX,
102
+ format_instructions=FORMAT_INSTRUCTIONS,
103
+ question_prompt=QUESTION_PROMPT,return_intermediate_steps=True ,handle_parsing_errors=True
104
+ ),
105
+ verbose=True,
106
+ max_iterations=max_iterations , return_intermediate_steps=True ,handle_parsing_errors=True
107
+ )
108
+
109
+
110
+ def run(self, prompt):
111
+
112
+ outputs = self.agent_executor1.invoke( {"input": prompt})
113
+ if outputs["intermediate_steps"] ==[]:
114
+ prompt = str(' ' +outputs["input"]+ ' ' + outputs["output"].split('Action:')[0] )
115
+ outputs = self.agent_executor2.invoke( {"input":prompt })
116
+ else:
117
+ prompt = str(' ' + outputs["input"] + ' ' + outputs["intermediate_steps"][0][0].log.split('Action:')[0])
118
+ outputs = self.agent_executor2.invoke( {"input": prompt})
119
+ return outputs
120
+
121
+ if __name__ == '__main__':
122
+ chem_model = TeLLAgent( temp=0.1, streaming=False,
123
+ openai_api_key =r'sk-itPrztYm9F6XZZpsBMJB9O7Vq0pYUABVVBSoThuBxEGTnDik',
124
+ image_path= r"C:\Users\BM109X32G-10GPU-02\Pictures\1735356359936.jpg"
125
+ )
126
+ chem_model.run(r"""what is Y20""")
app.py ADDED
@@ -0,0 +1,219 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import asyncio
3
+ # Init with fake key
4
+ if 'OPENAI_API_KEY' not in os.environ:
5
+ os.environ['OPENAI_API_KEY'] = 'none'
6
+ if os.name == 'nt':
7
+ asyncio.set_event_loop_policy(asyncio.WindowsProactorEventLoopPolicy())
8
+
9
+ import openai
10
+ import pandas as pd
11
+ import streamlit as st
12
+ from IPython.core.display import HTML
13
+ from PIL import Image
14
+ from agent import TeLLAgent, make_tools
15
+ from streamlit_callback_handler import \
16
+ StreamlitCallbackHandlerChem
17
+ import base64
18
+ import pandas as pd
19
+ from dotenv import load_dotenv
20
+ from langchain_openai import ChatOpenAI , OpenAI
21
+ import base64
22
+ from io import BytesIO
23
+ from PIL import Image
24
+ import tempfile
25
+
26
+
27
+ def convert_to_base64(pil_image):
28
+ buffered = BytesIO()
29
+ pil_image.save(buffered, format="PNG")
30
+ img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
31
+ return img_str
32
+
33
+ def oai_key_isvalid(api_key):
34
+ """Check if a given OpenAI key is valid"""
35
+ try:
36
+ llm = ChatOpenAI(openai_api_key = api_key, base_url="https://www.dmxapi.com/v1/")
37
+ out = llm.invoke("This is a test")
38
+ return True
39
+ except:
40
+ return False
41
+
42
+ load_dotenv()
43
+ ss = st.session_state
44
+ ss.prompt = None
45
+
46
+ # Set width of sidebar
47
+ st.markdown(
48
+ """
49
+ <style>
50
+ [data-testid="stSidebar"][aria-expanded="true"]{
51
+ min-width: 450px;
52
+ max-width: 450px;
53
+ }
54
+ """,
55
+ unsafe_allow_html=True,
56
+ )
57
+
58
+
59
+ def instantiate_agent(model,file_path = '...',
60
+ image_path ='...'):
61
+ ss.agent = TeLLAgent(
62
+ model=model,
63
+ tools_model=model,
64
+ temp=0.1,
65
+ openai_api_key=ss.get('api_key') , file_path = file_path,
66
+ image_path =image_path
67
+
68
+ )
69
+ return ss.agent
70
+
71
+ instantiate_agent('gpt-4o-2024-11-20')
72
+ tools = ss.agent.agent_executor.tools
73
+
74
+ tool_list = pd.Series(
75
+ {f"✅ {t.name}":t.description for t in tools}
76
+ ).reset_index()
77
+ tool_list.columns = ['Tool', 'Description']
78
+
79
+ def on_api_key_change():
80
+ api_key = ss.get('api_key') or os.getenv('OPENAI_API_KEY')
81
+
82
+ # Check if key is valid
83
+ if not oai_key_isvalid(api_key):
84
+ st.write("Please input a valid OpenAI API key.")
85
+
86
+ def run_prompt(prompt, file_path = '...', image_path = '...'):
87
+ agent = instantiate_agent(ss.get('model_select'),file_path = file_path, image_path =image_path)
88
+ st.chat_message("user").write(prompt)
89
+ with st.chat_message("assistant") :
90
+ try:
91
+
92
+ response = agent.run(prompt)
93
+ if ss.get('file_type') == 'CSV (.csv)':
94
+ try:
95
+ fx = pd.DataFrame(list(response))
96
+ st.markdown(":red[Prediction finished! ]")
97
+ st.download_button( "⬇️Download the predicted files as .csv", fx.to_csv(), "predict results.csv", use_container_width=True)
98
+ except:
99
+ st.write(response)
100
+ else:
101
+ st.write(response)
102
+ except openai.AuthenticationError:
103
+ st.write("Please input a valid OpenAI API key")
104
+ except openai.APIError:
105
+ # Handle specific API errors here
106
+ print("OpenAI API error, please try again!")
107
+
108
+
109
+ pre_prompts = [
110
+ 'Who are you?',
111
+ ('The history and development of Y6'
112
+
113
+ ),
114
+ (
115
+ 'Predict the LogP of Y6'
116
+ ),
117
+ 'Generate a donor material with PCE = 10'
118
+ ]
119
+
120
+ # sidebar
121
+ with st.sidebar:
122
+
123
+ st.header("🤖 :blue[TeLLAgent] ")
124
+ # Input OpenAI api key
125
+ st.text_input(
126
+ 'Input your OpenAI API key.',
127
+ placeholder = 'Input your OpenAI API key.',
128
+ type='password',
129
+ key='api_key',
130
+ on_change=on_api_key_change,
131
+ label_visibility="collapsed"
132
+ )
133
+
134
+ # Input model to use
135
+ st.selectbox(
136
+ 'Select model to use',
137
+ ['gpt-4o-2024-11-20', 'deepseek-v3', 'gpt-4o-mini'],
138
+ key='model_select',
139
+ )
140
+
141
+ # Display prompt examples
142
+ st.markdown('# What can I ask?')
143
+ cols = st.columns(2)
144
+ with cols[0]:
145
+ st.button(
146
+ r'👑 Who are you ? 🧨 ',
147
+ on_click=lambda: run_prompt(pre_prompts[0]),
148
+ )
149
+ st.button(
150
+ r'📚 The history and development of Y6 ',
151
+ on_click=lambda: run_prompt(pre_prompts[1]),
152
+ )
153
+ with cols[1]:
154
+ st.button(
155
+ r"🎄Predict the LogP of Y6 ",
156
+ on_click=lambda: run_prompt(pre_prompts[2]),
157
+ )
158
+ st.button(
159
+ r'💎 Generate a donor material with PCE = 10',
160
+ on_click=lambda: run_prompt(pre_prompts[3]),
161
+ )
162
+
163
+ st.selectbox(
164
+ 'Select the file type ',
165
+ ['None', 'CSV (.csv)', 'Figure (.jpg, .png, .jpeg)', 'PDF (.pdf)'],
166
+ key='file_type',
167
+ )
168
+ uploaded_file = None
169
+ if ss.get('file_type') == 'Figure (.jpg, .png, .jpeg)':
170
+ uploaded_file = st.file_uploader("Choose a Figure", type = ["jpg", "jpeg", "png"])
171
+ if ss.get('file_type') == 'PDF (.pdf)':
172
+ uploaded_file = st.file_uploader("Choose a PDF file")
173
+ if ss.get('file_type') == 'CSV (.csv)':
174
+ uploaded_file = st.file_uploader("Choose a csv file", type = 'csv')
175
+
176
+ # Display available tools
177
+ st.markdown(f"# {len(tool_list)} available tools")
178
+ st.dataframe(
179
+ tool_list,
180
+ use_container_width=True,
181
+ hide_index=True,
182
+ height=200
183
+ )
184
+
185
+ # Execute agent on user input
186
+ if prompt := st.chat_input("Say something and/or attach files"):
187
+
188
+ if uploaded_file is not None:
189
+ if ss.get('file_type') == 'CSV (.csv)':
190
+ with tempfile.NamedTemporaryFile( dir = 'j:/', suffix ='.csv' ,delete=False) as f:
191
+ f.write(uploaded_file.read())
192
+ run_prompt(prompt + str(' ') + str(f.name), file_path = f.name)
193
+ f.close()
194
+
195
+ if ss.get('file_type') == 'Figure (.jpg, .png, .jpeg)':
196
+
197
+ st.image(uploaded_file, width = 500)
198
+ with tempfile.NamedTemporaryFile(dir = 'j:/',delete=False, suffix=".png") as temp:
199
+
200
+ mg_str = base64.b64encode(uploaded_file.getvalue()).decode("utf-8")
201
+ temp.write(base64.b64decode(mg_str))
202
+
203
+ run_prompt(prompt+ str(' ') + str(temp.name), image_path = temp.name )
204
+
205
+ if ss.get('file_type') == 'PDF (.pdf)':
206
+ with tempfile.NamedTemporaryFile( dir = 'j:/', suffix ='.pdf' ,delete=False) as f:
207
+ f.write(uploaded_file.read())
208
+ run_prompt(prompt, file_path = f.name)
209
+ f.close()
210
+
211
+ # with open("input.png","wb") as af:
212
+ # mg_str = base64.b64encode(files.getvalue()).decode("utf-8")
213
+ # af.write(base64.b64decode(mg_str))
214
+
215
+ # run_prompt(prompt.text+str(f.name), image_path =f.name )
216
+ # except:
217
+ # st.markdown("Please input correct files or query ")
218
+ else:
219
+ run_prompt(prompt)
prompts.py ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # flake8: noqa
2
+ PREFIX = """
3
+ You are an AI system called TeLLAgent and your task is to respond to the question or
4
+ solve the problem to the best of your ability using the provided tools.
5
+
6
+ """
7
+
8
+ FORMAT_INSTRUCTIONS = """
9
+ You can only respond with a single complete
10
+ "Thought, Action, Action Input" format
11
+ OR a single "Final Answer" format.
12
+
13
+ Complete format:
14
+
15
+ Thought: (reflect on your progress and decide what to do next)
16
+ Action: (the action name, should be one of [{tool_names}])
17
+ Action Input: (the input string to the action)
18
+
19
+ OR
20
+
21
+ Final Answer: (the final answer to the original input question)
22
+ """
23
+
24
+ QUESTION_PROMPT1 = """
25
+ Give detailed step-by-step solution to answer the question below using the following tools:
26
+ Don't simplify the step description of the process.
27
+
28
+ {tool_strings}
29
+
30
+ Use the tools provided, using the most specific tool available for each action.
31
+ Your final answer should contain all information necessary to answer the question and subquestions.
32
+
33
+ IMPORTANT: Your first step is to check the following:
34
+
35
+ 1. Does the question contain the name of the molecule, CAS, or molecular graph?
36
+ if so, as a first step, you should consider if it needs to convert the graph using graphconverter, name using Mol2SMILES or CAS number using Query2SMILES to SMILES.
37
+
38
+ 2. Were you asked to predict the power conversion efficiency (PCE) ?
39
+ if so, you are only allowed to choose one of the following tools.
40
+ acceptor_predictor to predict the PCE of one acceptor molecule
41
+ donor_predictor to predict the PCE of one donor molecule
42
+ dap_predictor should be use when both the donor and acceptor molecule are offered
43
+
44
+ 3. Is the question about image,figure,graph or paper files ?
45
+ if so, the papers or images have already been provided or referenced in some way.
46
+ you should use ImageAnalysis or pdfreader to solve the question.
47
+ Do not use other tools.
48
+
49
+ 4. Were you ask to answer questions that require technical or general information ,
50
+ if so, you should combine the results from WebSearch, wikipedia and rag tool.
51
+
52
+ 5. when you use the tool rag, you do not process the answer, return the results directly.
53
+
54
+ 6.Do you need to work with images, you need to figure out the difference between the two tools Imageanalysis and graphconverter,
55
+ if you want to get SMILES of molecules choose graphconverter, if you want to analyze or read images use Imageanalysis.
56
+
57
+ Question: {input}
58
+ """
59
+
60
+ QUESTION_PROMPT = """
61
+ Answer the question below using the following tools:
62
+
63
+ {tool_strings}
64
+
65
+ Use the tools provided, using the most specific tool available for each action.
66
+ Your final answer should contain all information necessary to answer the question and subquestions.
67
+
68
+ IMPORTANT: Your first step is to check the following:
69
+ 1. Were you need to convert the molecular graph, name or CAS number to SMILES.
70
+ if so, as a first step, you should use graphconvertor, Query2SMILES(chemspace_api_key), Query2CAS() tools.
71
+
72
+ 2. Were you asked to predict the power conversion efficiency (PCE) ?
73
+ if so, as a first step, you should consider if it needs to convert the graph, name or CAS number to SMILES.
74
+ Then, as a second step, you are only allowed to choose one of the following tools.
75
+ acceptor_predictor to predict the PCE of acceptor molecule
76
+ donor_predictor to predict the PCE of donor molecule
77
+
78
+ 3. Were you ask to generate answer according to image or paper files,
79
+ if so, the papers or images have already been provided or referenced in some way.
80
+ you are only allowed to output the answer by using ImageAnalysis and pdfreader .
81
+ Do not use other tools.
82
+
83
+ 4. Were you ask to answer questions that require technical or general inquiry about a term or concept,
84
+ if so, you should use rag tool first, then WebSearch, final wikipedia,
85
+
86
+ 5. when you use the tool rag, you do not process the answer, return the results directly.
87
+ Question: {input}
88
+ """
89
+
90
+ SUFFIX = """
91
+
92
+ Thought: {agent_scratchpad}
93
+ """
94
+ FINAL_ANSWER_ACTION = "Final Answer:"
95
+
96
+
97
+ REPHRASE_TEMPLATE = """In this exercise you will assume the role of a scientific assistant named TeLLAgent. Your task is to answer the provided question as best as you can, based on the provided solution draft.
98
+ The solution draft follows the format "Thought, Action, Action Input, Observation", where the 'Thought' statements describe a reasoning sequence. The rest of the text is information obtained to complement the reasoning sequence, and it is 100% accurate.
99
+ Your task is to write an answer to the question based on the solution draft, and the following guidelines:
100
+ You need to be as detailed as possible in your answers to the questions and reduce the processing of the tool's output.
101
+ Direct output the results when using Imageanalysis, codewriter, pdfreader, rag tools without further processing.
102
+
103
+ The text should have an educative and assistant-like tone, be accurate, follow the same reasoning sequence than the solution draft and explain how any conclusion is reached.
104
+ Question: {question}
105
+
106
+ Solution draft: {agent_ans}
107
+
108
+ Answer:
109
+ """
110
+
requirement.txt ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ paper-scraper @ git+https://github.com/blackadad/paper-scraper.git
2
+ numpy==1.26.4
3
+ rdkit==2023.9.53
4
+ streamlit
5
+ google-search-results==2.4.2
6
+ python-dotenv
7
+ wikipedia
8
+ torch==2.4.1 torchvision==0.19.1 torchaudio==2.4.1 --index-url https://download.pytorch.org/whl/cu118
9
+ pydantic==2.10.4
10
+ wandb
11
+ langchain==0.3.19
12
+ langchain-core==0.3.39
13
+ langchain-community==0.3.18
14
+ langchain-openai==0.3.1
15
+ typing-inspect==0.8.0
16
+ typing_extensions==4.12.2
17
+ paper-qa
18
+ pandas
19
+ molsets
20
+ scipy==1.15.2
21
+ selfies==2.2.0
22
+ browser-use
search.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_openai import ChatOpenAI
2
+ from browser_use import Agent
3
+ import asyncio
4
+ from dotenv import load_dotenv
5
+ load_dotenv()
6
+ from langchain.tools import BaseTool
7
+
8
+ class SMILES2Weight(BaseTool):
9
+ name: str = "webuse"
10
+ description: str = ("Calling the browser to search for information "
11
+ "input query, return the searching results")
12
+
13
+ def __init__(
14
+ self,
15
+ ):
16
+ super().__init__()
17
+
18
+ async def _run(self, task: str) -> str:
19
+ agent = Agent(
20
+ task="Go to Reddit, search for 'browser-use', click on the first post and return the first comment.",
21
+ llm = ChatOpenAI(model="gpt-4o-2024-11-20",api_key='sk-itPrztYm9F6XZZpsBMJB9O7Vq0pYUABVVBSoThuBxEGTnDik',
22
+ base_url="https://www.dmxapi.com/v1"),
23
+ )
24
+ result = await agent.run()
25
+ return result
26
+
27
+ async def _arun(self, smiles: str) -> str:
28
+ """Use the tool asynchronously."""
29
+ raise NotImplementedError()
30
+
31
+
32
+
streamlit_callback_handler.py ADDED
@@ -0,0 +1,204 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Any, Dict, List, Optional
2
+
3
+ from langchain.callbacks.streamlit.streamlit_callback_handler import (
4
+
5
+ LLMThought,
6
+ LLMThoughtLabeler,
7
+ LLMThoughtState,
8
+ StreamlitCallbackHandler,
9
+ ToolRecord,
10
+ )
11
+ from langchain_core.agents import AgentAction, AgentFinish
12
+ from streamlit.delta_generator import DeltaGenerator
13
+
14
+ from utils import is_smiles
15
+
16
+ import requests
17
+ from langchain import LLMChain, PromptTemplate
18
+ from langchain.chat_models import ChatOpenAI
19
+ from rdkit import Chem
20
+
21
+
22
+ def cdk(smiles):
23
+ """
24
+ Get a depiction of some smiles.
25
+ """
26
+
27
+ url = "https://www.simolecule.com/cdkdepict/depict/wob/svg"
28
+ headers = {"Content-Type": "application/json"}
29
+ response = requests.get(
30
+ url,
31
+ headers=headers,
32
+ params={
33
+ "smi": smiles,
34
+ "annotate": "colmap",
35
+ "zoom": 2,
36
+ "w": 150,
37
+ "h": 80,
38
+ "abbr": "off",
39
+ },
40
+ )
41
+ return response.text
42
+
43
+
44
+ class LLMThoughtChem(LLMThought):
45
+ def __init__(
46
+ self,
47
+ parent_container: DeltaGenerator,
48
+ labeler: LLMThoughtLabeler,
49
+ expanded: bool,
50
+ collapse_on_complete: bool,
51
+ ):
52
+ super().__init__(
53
+ parent_container,
54
+ labeler,
55
+ expanded,
56
+ collapse_on_complete,
57
+ )
58
+
59
+ def on_tool_end(
60
+ self,
61
+ output: str,
62
+ color: Optional[str] = None,
63
+ observation_prefix: Optional[str] = None,
64
+ llm_prefix: Optional[str] = None,
65
+ output_ph: dict = {},
66
+ input_tool: str = "",
67
+ serialized: dict = {},
68
+ **kwargs: Any,
69
+ ) -> None:
70
+ # Depending on the tool name, decide what to display.
71
+ if serialized["name"] == "Name2SMILES":
72
+ safe_smiles = output.replace("[", "\[").replace("]", "\]")
73
+ if is_smiles(output):
74
+ self._container.markdown(
75
+ f"**{safe_smiles}**{cdk(output)}", unsafe_allow_html=True
76
+ )
77
+
78
+ if serialized["name"] == "ReactionPredict":
79
+ rxn = f"{input_tool}>>{output}"
80
+ safe_smiles = rxn.replace("[", "\[").replace("]", "\]")
81
+ self._container.markdown(
82
+ f"**{safe_smiles}**{cdk(rxn)}", unsafe_allow_html=True
83
+ )
84
+
85
+ if serialized["name"] == "ReactionRetrosynthesis":
86
+ output = output.replace("[", "\[").replace("]", "\]")
87
+
88
+ def on_tool_start(
89
+ self, serialized: Dict[str, Any], input_str: str, **kwargs: Any
90
+ ) -> None:
91
+ # Called with the name of the tool we're about to run (in `serialized[name]`),
92
+ # and its input. We change our container's label to be the tool name.
93
+ self._state = LLMThoughtState.RUNNING_TOOL
94
+ tool_name = serialized["name"]
95
+ self._last_tool = ToolRecord(name=tool_name, input_str=input_str)
96
+ self._container.update(
97
+ new_label=(
98
+ self._labeler.get_tool_label(self._last_tool, is_complete=False)
99
+ .replace("[", "\[")
100
+ .replace("]", "\]")
101
+ )
102
+ )
103
+
104
+ # Display note of potential long time
105
+ if serialized["name"] == "ReactionRetrosynthesis" or serialized["name"] == "LiteratureSearch":
106
+ self._container.markdown(
107
+ f"‼️ Note: This tool can take some time to complete execution ‼️",
108
+ unsafe_allow_html=True,
109
+ )
110
+
111
+ def complete(self, final_label: Optional[str] = None) -> None:
112
+ """Finish the thought."""
113
+ if final_label is None and self._state == LLMThoughtState.RUNNING_TOOL:
114
+ assert (
115
+ self._last_tool is not None
116
+ ), "_last_tool should never be null when _state == RUNNING_TOOL"
117
+ final_label = self._labeler.get_tool_label(
118
+ self._last_tool, is_complete=True
119
+ )
120
+ self._state = LLMThoughtState.COMPLETE
121
+
122
+ final_label = final_label.replace("[", "\[").replace("]", "\]")
123
+ if self._collapse_on_complete:
124
+ self._container.update(new_label=final_label, new_expanded=False)
125
+ else:
126
+ self._container.update(new_label=final_label)
127
+
128
+
129
+ class StreamlitCallbackHandlerChem(StreamlitCallbackHandler):
130
+ def __init__(
131
+ self,
132
+ parent_container: DeltaGenerator,
133
+ *,
134
+ max_thought_containers: int = 4,
135
+ expand_new_thoughts: bool = True,
136
+ collapse_completed_thoughts: bool = True,
137
+ thought_labeler: Optional[LLMThoughtLabeler] = None,
138
+ output_placeholder: dict = {},
139
+ ):
140
+ super(StreamlitCallbackHandlerChem, self).__init__(
141
+ parent_container,
142
+ max_thought_containers=max_thought_containers,
143
+ expand_new_thoughts=expand_new_thoughts,
144
+ collapse_completed_thoughts=collapse_completed_thoughts,
145
+ thought_labeler=thought_labeler,
146
+ )
147
+
148
+ self._output_placeholder = output_placeholder
149
+ self.last_input = ""
150
+
151
+ def on_llm_start(
152
+ self, serialized: Dict[str, Any], prompts: List[str], **kwargs: Any
153
+ ) -> None:
154
+ if self._current_thought is None:
155
+ self._current_thought = LLMThoughtChem(
156
+ parent_container=self._parent_container,
157
+ expanded=self._expand_new_thoughts,
158
+ collapse_on_complete=self._collapse_completed_thoughts,
159
+ labeler=self._thought_labeler,
160
+ )
161
+
162
+ self._current_thought.on_llm_start(serialized, prompts)
163
+
164
+ # We don't prune_old_thought_containers here, because our container won't
165
+ # be visible until it has a child.
166
+
167
+ def on_tool_start(
168
+ self, serialized: Dict[str, Any], input_str: str, **kwargs: Any
169
+ ) -> None:
170
+ self._require_current_thought().on_tool_start(serialized, input_str, **kwargs)
171
+ self._prune_old_thought_containers()
172
+ self._last_input = input_str
173
+ self._serialized = serialized
174
+
175
+ def on_tool_end(
176
+ self,
177
+ output: str,
178
+ color: Optional[str] = None,
179
+ observation_prefix: Optional[str] = None,
180
+ llm_prefix: Optional[str] = None,
181
+ **kwargs: Any,
182
+ ) -> None:
183
+ self._require_current_thought().on_tool_end(
184
+ output,
185
+ color,
186
+ observation_prefix,
187
+ llm_prefix,
188
+ output_ph=self._output_placeholder,
189
+ input_tool=self._last_input,
190
+ serialized=self._serialized,
191
+ **kwargs,
192
+ )
193
+ self._complete_current_thought()
194
+
195
+ def on_agent_finish(
196
+ self, finish: AgentFinish, color: Optional[str] = None, **kwargs: Any
197
+ ) -> None:
198
+ if self._current_thought is not None:
199
+ self._current_thought.complete(
200
+ self._thought_labeler.get_final_agent_thought_label()
201
+ .replace("[", "\[")
202
+ .replace("]", "\]")
203
+ )
204
+ self._current_thought = None
test.py ADDED
@@ -0,0 +1,262 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Created on Tue Mar 25 16:09:34 2025
4
+
5
+ @author: BM109X32G-10GPU-02
6
+ """
7
+ import os
8
+ import sys
9
+ import asyncio
10
+ import streamlit as st
11
+ from dotenv import load_dotenv
12
+
13
+ # Ensure local repository (browser_use) is accessible
14
+ sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
15
+
16
+ from browser_use import Agent
17
+ from browser_use.browser.browser import Browser, BrowserConfig
18
+ from browser_use.controller.service import Controller
19
+ from langchain_openai import ChatOpenAI
20
+ # Load environment variables
21
+ load_dotenv()
22
+
23
+ if os.name == 'nt':
24
+ asyncio.set_event_loop_policy(asyncio.WindowsProactorEventLoopPolicy())
25
+
26
+ # Function to get the LLM based on provider
27
+ def get_llm(provider: str):
28
+ if provider == 'anthropic':
29
+
30
+ api_key = 'sk-itPrztYm9F6XZZpsBMJB9O7Vq0pYUABVVBSoThuBxEGTnDik'
31
+ if not api_key:
32
+ st.error("Error: ANTHROPIC_API_KEY is not set. Please provide a valid API key.")
33
+ st.stop()
34
+
35
+ return ChatOpenAI(openai_api_key = api_key, base_url="https://www.dmxapi.com/v1/")
36
+ elif provider == 'openai':
37
+
38
+ api_key = 'sk-itPrztYm9F6XZZpsBMJB9O7Vq0pYUABVVBSoThuBxEGTnDik'
39
+ if not api_key:
40
+ st.error("Error: OPENAI_API_KEY is not set. Please provide a valid API key.")
41
+ st.stop()
42
+
43
+ return ChatOpenAI(openai_api_key = api_key, base_url="https://www.dmxapi.com/v1/")
44
+ else:
45
+ st.error(f'Unsupported provider: {provider}')
46
+ st.stop()
47
+
48
+ # Function to initialize the agent
49
+ def initialize_agent(query: str, provider: str):
50
+ llm = get_llm(provider)
51
+ controller = Controller()
52
+ browser = Browser(config=BrowserConfig())
53
+
54
+ return Agent(
55
+ task=query,
56
+ llm=llm,
57
+ controller=controller,
58
+ browser=browser,
59
+ use_vision=True,
60
+ max_actions_per_step=1,
61
+ ), browser
62
+
63
+ # Streamlit UI
64
+ st.title("Automated Browser Agent with LLMs 🤖")
65
+ if st.button("Foo"):
66
+ st.session_state.value = "Foo"
67
+ st.rerun()
68
+ query = st.text_input("Enter your query:", "go to reddit and search for posts about browser-use")
69
+ provider = st.radio("Select LLM Provider:", ["openai", "anthropic"], index=0)
70
+
71
+ if st.button("Run Agent"):
72
+ st.write("Initializing agent...")
73
+ agent, browser = initialize_agent(query, provider)
74
+
75
+ async def run_agent():
76
+ with st.spinner("Running automation..."):
77
+ await agent.run(max_steps=25)
78
+ st.success("Task completed! 🎉")
79
+
80
+ asyncio.run(run_agent())
81
+
82
+ st.button("Close Browser", on_click=lambda: asyncio.run(browser.close()))
83
+ import streamlit as st
84
+
85
+ if "value" not in st.session_state:
86
+ st.session_state.value = "Title"
87
+
88
+ ##### Option using st.rerun #####
89
+ st.header(st.session_state.value)
90
+
91
+
92
+ # from langchain.chains import LLMChain, SimpleSequentialChain, RetrievalQA, ConversationalRetrievalChain
93
+
94
+ # from langchain import PromptTemplate
95
+
96
+ # from langchain.tools import BaseTool
97
+
98
+ # from langchain_core.messages import HumanMessage, SystemMessage
99
+ # from langchain.base_language import BaseLanguageModel
100
+ # from langchain.text_splitter import CharacterTextSplitter
101
+
102
+
103
+ # from langchain_community.document_loaders import PyPDFLoader
104
+ # from langchain_community.vectorstores import FAISS
105
+ # from langchain_openai import ChatOpenAI
106
+ # from langchain_openai import OpenAIEmbeddings
107
+
108
+ # from langchain_community.document_loaders import PyPDFLoader
109
+ # import streamlit as st
110
+ # import pandas as pd
111
+ # import base64
112
+
113
+ # from langchain_community.embeddings import OllamaEmbeddings
114
+ # from langchain.tools import BaseTool
115
+ # from langchain_openai import ChatOpenAI
116
+ # from langchain_core.messages import HumanMessage, SystemMessage
117
+ # from langchain.base_language import BaseLanguageModel
118
+ # import base64
119
+ # from io import BytesIO
120
+ # from PIL import Image
121
+ # from langchain_community.embeddings import OllamaEmbeddings
122
+ # from langchain.tools import BaseTool
123
+ # from langchain_openai import ChatOpenAI
124
+ # from langchain_core.messages import HumanMessage, SystemMessage
125
+ # from langchain.base_language import BaseLanguageModel
126
+
127
+
128
+ # from tempfile import NamedTemporaryFile
129
+ # import streamlit as st
130
+
131
+ # uploaded_file = st.file_uploader("File upload")
132
+
133
+ # def convert_to_base64(pil_image):
134
+ # buffered = BytesIO()
135
+ # pil_image.save(buffered, format="PNG")
136
+ # img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
137
+ # return img_str
138
+
139
+ # prompt = st.chat_input(
140
+ # "Say something and/or attach an image",
141
+ # accept_file=True,
142
+
143
+ # )
144
+ # uploaded_file = st.file_uploader("Choose a file")
145
+ # template = """
146
+
147
+ # You are an expert chemist and your task is to respond to the question or
148
+ # solve the problem to the best of your ability. You need to answer in as much detail as possible.
149
+ # You can only respond with a single "Final Answer" format.
150
+ # Use the following pieces of context to answer the question at the end.
151
+ # If you don't know the answer, just say that you don't know, don't try to make up an answer.
152
+ # <context>
153
+ # {context}
154
+ # </context>
155
+
156
+ # Question: {question}
157
+ # Answer:
158
+
159
+ # """
160
+
161
+ # import tempfile
162
+ # import base64
163
+ # from io import BytesIO
164
+ # from PIL import Image
165
+
166
+
167
+ # if uploaded_file is None:
168
+ # st.markdown(prompt)
169
+ # if uploaded_file is not None:
170
+ # # try:
171
+
172
+ # # loader = PyPDFLoader(uploaded_file)
173
+ # # documents = loader.load()
174
+ # # st.markdown(prompt)
175
+ # # except:
176
+ # # #try:
177
+
178
+ # # file = pd.read_csv(uploaded_file)
179
+ # # st.markdown(prompt)
180
+ # #
181
+ # # with tempfile.NamedTemporaryFile( dir = 'j:/', suffix ='.png' ,delete=False) as f:
182
+ # with tempfile.NamedTemporaryFile(dir = 'j:/',delete=False, suffix=".png") as temp_file:
183
+
184
+ # mg_str = base64.b64encode(uploaded_file.getvalue()).decode("utf-8")
185
+
186
+
187
+ # temp_file.write(base64.b64decode(mg_str))
188
+
189
+ # # image = Image.new('RGB', (100, 100), color='blue')
190
+ # # image.save(uploaded_file.getvalue(), format='PNG')
191
+ # # loaded_image = Image.open(f.name)
192
+
193
+ # pil_image = Image.open(temp_file.name)
194
+ # rgb_im = pil_image.convert('RGB')
195
+ # image_b64 = convert_to_base64(pil_image)
196
+ # query = 'what can you see in the image'
197
+ # llm = ChatOpenAI(model="gpt-4o-2024-11-20",api_key='sk-itPrztYm9F6XZZpsBMJB9O7Vq0pYUABVVBSoThuBxEGTnDik',
198
+ # base_url="https://www.dmxapi.com/v1")
199
+ # message = HumanMessage(
200
+ # content=[
201
+ # {"type": "text", "text": query},
202
+ # {
203
+ # "type": "image_url",
204
+ # "image_url": {"url":f"data:image/jpeg;base64,{image_b64}"},
205
+ # },
206
+ # ],)
207
+
208
+ # response = llm.invoke([message])
209
+ # st.markdown(response.content)
210
+
211
+ # # mg_str = base64.b64encode(files.getvalue()).decode("utf-8")
212
+
213
+ # # img_str = base64.b64encode(files.getvalue()).decode("utf-8")
214
+ # # # image_b64 = convert_to_base64(files.getvalue())
215
+
216
+ # #
217
+ # # st.markdown(query)
218
+ # # st.markdown(response.content)
219
+
220
+
221
+
222
+
223
+
224
+
225
+
226
+
227
+
228
+
229
+
230
+
231
+
232
+
233
+
234
+
235
+
236
+ # with open("input.pdf","wb") as f:
237
+ # base64_pdf = base64.b64encode(prompt["files"][0].read()).decode('utf-8')
238
+ # f.write(base64.b64decode(base64_pdf))
239
+ # loader = PyPDFLoader(f.name)
240
+ # documents = loader.load()
241
+
242
+ # text_splitter = CharacterTextSplitter(chunk_size=6000, chunk_overlap=1000)
243
+ # docs = text_splitter.split_documents(documents)
244
+ # embeddings = OpenAIEmbeddings(model="text-embedding-3-large",api_key='sk-itPrztYm9F6XZZpsBMJB9O7Vq0pYUABVVBSoThuBxEGTnDik',
245
+ # base_url="https://www.dmxapi.com/v1")
246
+
247
+
248
+ # vectorstore = FAISS.from_documents(docs, embeddings)
249
+ # prompt = PromptTemplate(template=template, input_variables=[ "question"])
250
+ # qa_chain = RetrievalQA.from_chain_type(
251
+ # llm= ChatOpenAI(model="gpt-4o-2024-11-20",api_key='sk-itPrztYm9F6XZZpsBMJB9O7Vq0pYUABVVBSoThuBxEGTnDik',
252
+ # base_url="https://www.dmxapi.com/v1"),
253
+ # chain_type="stuff",
254
+ # retriever=vectorstore.as_retriever(search_kwargs={"k": 2}),
255
+ # return_source_documents=True,
256
+ # chain_type_kwargs={"prompt": prompt},
257
+ # )
258
+
259
+ # result = qa_chain.invoke('what is the main point')
260
+ # st.markdown(result['result'])
261
+ # a = pd.read_csv(prompt["files"][0] )
262
+ # st.markdown(a)
tools.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ from langchain_community.agent_toolkits.load_tools import load_tools
4
+ from langchain.base_language import BaseLanguageModel
5
+
6
+ from tool import *
7
+
8
+
9
+ def make_tools(llm: BaseLanguageModel, api_keys: dict = {}, verbose=True, image_path = None, file_path = None):
10
+ serp_api_key = api_keys.get("SERP_API_KEY") or os.getenv("SERP_API_KEY")
11
+ image_path = image_path
12
+ file_path = file_path
13
+ openai_api_key = api_keys.get("OPENAI_API_KEY") or os.getenv("OPENAI_API_KEY")
14
+ chemspace_api_key = api_keys.get("CHEMSPACE_API_KEY") or os.getenv(
15
+ "CHEMSPACE_API_KEY"
16
+ )
17
+ semantic_scholar_api_key = api_keys.get("SEMANTIC_SCHOLAR_API_KEY") or os.getenv(
18
+ "SEMANTIC_SCHOLAR_API_KEY"
19
+ )
20
+ serp_api_key = '3795acda6a74ea15033d34b54eac82982b26f559147d9cf04aca4bfca91c3e9d'
21
+ all_tools = load_tools(
22
+ [
23
+ #"python_repl",
24
+ # "ddg-search",
25
+ "wikipedia",
26
+ # "human"
27
+ ]
28
+ )
29
+
30
+ all_tools += [
31
+ browseruse(),
32
+
33
+ rag(),
34
+ codewriter(),
35
+
36
+ Query2SMILES(chemspace_api_key),
37
+ Mol2SMILES(chemspace_api_key) ,
38
+ Query2CAS(),
39
+ SMILES2Name(),
40
+ SMILES2SAScore(),
41
+ SMILES2LogP(),
42
+ SMILES2Properties(),
43
+ MolSimilarity(),
44
+ SMILES2Weight(),
45
+ FuncGroups(),
46
+ donor_predictor(),
47
+ acceptor_predictor(),
48
+ homolumo_predictor(),
49
+ dap_screen(),
50
+ graphconverter(),
51
+ molgen(),
52
+ dap_predictor(),
53
+ Scholar2ResultLLM(
54
+ llm=llm,
55
+ openai_api_key=openai_api_key,
56
+ semantic_scholar_api_key=semantic_scholar_api_key ),
57
+ ]
58
+
59
+ if serp_api_key:
60
+ all_tools += [WebSearch(serp_api_key)
61
+ ]
62
+ if image_path is not None:
63
+ all_tools += [Imageanalysis(image_path),
64
+
65
+ ]
66
+ if file_path is not None:
67
+ all_tools += [pdfreader(file_path),
68
+ ]
69
+
70
+ return all_tools
utils.py ADDED
@@ -0,0 +1,159 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+
3
+ import requests
4
+ from rdkit import Chem, DataStructs
5
+ from rdkit.Chem import AllChem
6
+
7
+ from urllib.request import urlopen
8
+ from urllib.parse import quote
9
+
10
+ def is_smiles(text):
11
+ try:
12
+ m = Chem.MolFromSmiles(text, sanitize=False)
13
+ if m is None:
14
+ return False
15
+ return True
16
+ except:
17
+ return False
18
+
19
+
20
+ def is_multiple_smiles(text):
21
+ if is_smiles(text):
22
+ return "." in text
23
+ return False
24
+
25
+
26
+ def split_smiles(text):
27
+ return text.split(".")
28
+
29
+
30
+ def is_cas(text):
31
+ pattern = r"^\d{2,7}-\d{2}-\d$"
32
+ return re.match(pattern, text) is not None
33
+
34
+
35
+ def largest_mol(smiles):
36
+ ss = smiles.split(".")
37
+ ss.sort(key=lambda a: len(a))
38
+ while not is_smiles(ss[-1]):
39
+ rm = ss[-1]
40
+ ss.remove(rm)
41
+ return ss[-1]
42
+
43
+
44
+ def canonical_smiles(smiles):
45
+ try:
46
+ smi = Chem.MolToSmiles(Chem.MolFromSmiles(smiles), canonical=True)
47
+ return smi
48
+ except Exception:
49
+ return "Invalid SMILES string"
50
+
51
+
52
+ def tanimoto(s1, s2):
53
+ """Calculate the Tanimoto similarity of two SMILES strings."""
54
+ try:
55
+ mol1 = Chem.MolFromSmiles(s1)
56
+ mol2 = Chem.MolFromSmiles(s2)
57
+ fp1 = AllChem.GetMorganFingerprintAsBitVect(mol1, 2, nBits=2048)
58
+ fp2 = AllChem.GetMorganFingerprintAsBitVect(mol2, 2, nBits=2048)
59
+ return DataStructs.TanimotoSimilarity(fp1, fp2)
60
+ except (TypeError, ValueError, AttributeError):
61
+ return "Error: Not a valid SMILES string"
62
+
63
+ def CIRconvert(ids):
64
+
65
+ url = 'http://cactus.nci.nih.gov/chemical/structure/' + quote(ids) + '/smiles'
66
+ ans = urlopen(url).read().decode('utf8')
67
+ return ans
68
+
69
+
70
+
71
+ def pubchem_query2smiles(
72
+ query: str,
73
+ url: str = "https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{}/{}",
74
+ ) -> str:
75
+ if is_smiles(query):
76
+ if not is_multiple_smiles(query):
77
+ return query
78
+ else:
79
+ raise ValueError(
80
+ "Multiple SMILES strings detected, input one molecule at a time."
81
+ )
82
+ if url is None:
83
+ url = "https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{}/{}"
84
+ r = requests.get(url.format(query, "property/IsomericSMILES/JSON"))
85
+ # convert the response to a json object
86
+ data = r.json()
87
+ # return the SMILES string
88
+ try:
89
+ smi = data["PropertyTable"]["Properties"][0]["IsomericSMILES"]
90
+ except:
91
+ try:
92
+ smi = CIRconvert(query)
93
+
94
+ except KeyError:
95
+ return "Could not find a molecule matching the text. One possible cause is that the input is incorrect, input one molecule at a time."
96
+ return str(Chem.CanonSmiles(largest_mol(smi)))
97
+
98
+
99
+ def query2cas(query: str, url_cid: str, url_data: str):
100
+ try:
101
+ mode = "name"
102
+ if is_smiles(query):
103
+ if is_multiple_smiles(query):
104
+ raise ValueError(
105
+ "Multiple SMILES strings detected, input one molecule at a time."
106
+ )
107
+ mode = "smiles"
108
+ url_cid = url_cid.format(mode, query)
109
+ cid = requests.get(url_cid).json()["IdentifierList"]["CID"][0]
110
+ url_data = url_data.format(cid)
111
+ data = requests.get(url_data).json()
112
+ except (requests.exceptions.RequestException, KeyError):
113
+ raise ValueError("Invalid molecule input, no Pubchem entry")
114
+
115
+ try:
116
+ for section in data["Record"]["Section"]:
117
+ if section.get("TOCHeading") == "Names and Identifiers":
118
+ for subsection in section["Section"]:
119
+ if subsection.get("TOCHeading") == "Other Identifiers":
120
+ for subsubsection in subsection["Section"]:
121
+ if subsubsection.get("TOCHeading") == "CAS":
122
+ return subsubsection["Information"][0]["Value"][
123
+ "StringWithMarkup"
124
+ ][0]["String"]
125
+ except KeyError:
126
+ raise ValueError("Invalid molecule input, no Pubchem entry")
127
+
128
+ raise ValueError("CAS number not found")
129
+
130
+
131
+ def smiles2name(smi, single_name=True):
132
+ """This function queries the given molecule smiles and returns a name record or iupac"""
133
+
134
+ try:
135
+ smi = Chem.MolToSmiles(Chem.MolFromSmiles(smi), canonical=True)
136
+ except Exception:
137
+ raise ValueError("Invalid SMILES string")
138
+ # query the PubChem database
139
+ r = requests.get(
140
+ "https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/"
141
+ + smi
142
+ + "/synonyms/JSON"
143
+ )
144
+ # convert the response to a json object
145
+ data = r.json()
146
+ # return the SMILES string
147
+ try:
148
+ if single_name:
149
+ index = 0
150
+ names = data["InformationList"]["Information"][0]["Synonym"]
151
+ while is_cas(name := names[index]):
152
+ index += 1
153
+ if index == len(names):
154
+ raise ValueError("No name found")
155
+ else:
156
+ name = data["InformationList"]["Information"][0]["Synonym"]
157
+ except KeyError:
158
+ raise ValueError("Unknown Molecule")
159
+ return name