Upload 9 files
Browse files- agent.py +126 -0
- app.py +219 -0
- prompts.py +110 -0
- requirement.txt +22 -0
- search.py +32 -0
- streamlit_callback_handler.py +204 -0
- test.py +262 -0
- tools.py +70 -0
- utils.py +159 -0
agent.py
ADDED
|
@@ -0,0 +1,126 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import Optional
|
| 2 |
+
|
| 3 |
+
import langchain
|
| 4 |
+
from dotenv import load_dotenv
|
| 5 |
+
from langchain_core.prompts import PromptTemplate
|
| 6 |
+
from langchain import chains
|
| 7 |
+
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
|
| 8 |
+
from pydantic import ValidationError
|
| 9 |
+
from langchain.agents import AgentExecutor
|
| 10 |
+
from langchain.agents.mrkl.base import ZeroShotAgent
|
| 11 |
+
from prompts import FORMAT_INSTRUCTIONS, QUESTION_PROMPT, QUESTION_PROMPT1, SUFFIX
|
| 12 |
+
from tools import make_tools
|
| 13 |
+
|
| 14 |
+
from rmrkl import ChatZeroShotAgent, RetryAgentExecutor
|
| 15 |
+
|
| 16 |
+
import base64
|
| 17 |
+
from io import BytesIO
|
| 18 |
+
from PIL import Image
|
| 19 |
+
from IPython.display import HTML, display
|
| 20 |
+
from langchain_openai import ChatOpenAI , OpenAI
|
| 21 |
+
|
| 22 |
+
def convert_to_base64(pil_image):
|
| 23 |
+
buffered = BytesIO()
|
| 24 |
+
pil_image.save(buffered, format="PNG")
|
| 25 |
+
img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
|
| 26 |
+
return img_str
|
| 27 |
+
|
| 28 |
+
def _make_llm(model, temp, api_key, streaming: bool = False):
|
| 29 |
+
if model.startswith("claude") or model.startswith("gpt-3"):
|
| 30 |
+
llm = OpenAI(
|
| 31 |
+
temperature=temp,
|
| 32 |
+
model_name=model,
|
| 33 |
+
max_tokens = 5000,
|
| 34 |
+
openai_api_key=api_key,
|
| 35 |
+
base_url="https://www.dmxapi.com/v1"
|
| 36 |
+
)
|
| 37 |
+
elif model.startswith("gpt-4o-2024-11-20") or model.startswith("deepseek"):
|
| 38 |
+
llm = ChatOpenAI(model=model,
|
| 39 |
+
temperature = 0.1,
|
| 40 |
+
|
| 41 |
+
timeout=1000,
|
| 42 |
+
|
| 43 |
+
callbacks=[StreamingStdOutCallbackHandler()],
|
| 44 |
+
openai_api_key=api_key,base_url="https://www.dmxapi.com/v1"
|
| 45 |
+
)
|
| 46 |
+
|
| 47 |
+
else:
|
| 48 |
+
raise ValueError(f"Invalid model name: {model}")
|
| 49 |
+
return llm
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
class TeLLAgent:
|
| 53 |
+
def __init__(
|
| 54 |
+
self,
|
| 55 |
+
tools=None,
|
| 56 |
+
model1: str = "deepseek-ai/DeepSeek-R1",
|
| 57 |
+
model2: str = "deepseek-ai/DeepSeek-V3",
|
| 58 |
+
tools_model="gpt-4o-2024-11-20",
|
| 59 |
+
temp=0.1,
|
| 60 |
+
max_iterations=50,
|
| 61 |
+
verbose=True,
|
| 62 |
+
streaming: bool = True,
|
| 63 |
+
openai_api_key= None,
|
| 64 |
+
api_keys: str = {},
|
| 65 |
+
file_path: str= r"...",
|
| 66 |
+
image_path: str = r"..."
|
| 67 |
+
):
|
| 68 |
+
"""Initialize agent."""
|
| 69 |
+
self.file_path = file_path
|
| 70 |
+
self.image_path = image_path
|
| 71 |
+
load_dotenv()
|
| 72 |
+
try:
|
| 73 |
+
self.llm1 = _make_llm(model1, temp, openai_api_key, streaming)
|
| 74 |
+
self.llm2 = _make_llm(model2, temp, openai_api_key, streaming)
|
| 75 |
+
except ValidationError:
|
| 76 |
+
raise ValueError("Invalid OpenAI API key")
|
| 77 |
+
|
| 78 |
+
if tools is None:
|
| 79 |
+
api_keys["OPENAI_API_KEY"] = 'sk-itPrztYm9F6XZZpsBMJB9O7Vq0pYUABVVBSoThuBxEGTnDik'
|
| 80 |
+
tools_llm = _make_llm(tools_model, temp, openai_api_key, streaming)
|
| 81 |
+
tools = make_tools(tools_llm, api_keys=api_keys, verbose=verbose, image_path = image_path, file_path = file_path)
|
| 82 |
+
|
| 83 |
+
# Initialize agent
|
| 84 |
+
self.agent_executor1 = RetryAgentExecutor.from_agent_and_tools(
|
| 85 |
+
tools=tools,
|
| 86 |
+
agent=ChatZeroShotAgent.from_llm_and_tools(
|
| 87 |
+
self.llm1,
|
| 88 |
+
tools,
|
| 89 |
+
suffix=SUFFIX,
|
| 90 |
+
format_instructions=FORMAT_INSTRUCTIONS,
|
| 91 |
+
question_prompt=QUESTION_PROMPT1, return_intermediate_steps=True ,handle_parsing_errors=True
|
| 92 |
+
),
|
| 93 |
+
verbose=True,
|
| 94 |
+
max_iterations=1 , return_intermediate_steps=True, handle_parsing_errors=True
|
| 95 |
+
)
|
| 96 |
+
self.agent_executor2 = RetryAgentExecutor.from_agent_and_tools(
|
| 97 |
+
tools=tools,
|
| 98 |
+
agent=ChatZeroShotAgent.from_llm_and_tools(
|
| 99 |
+
self.llm2,
|
| 100 |
+
tools,
|
| 101 |
+
suffix=SUFFIX,
|
| 102 |
+
format_instructions=FORMAT_INSTRUCTIONS,
|
| 103 |
+
question_prompt=QUESTION_PROMPT,return_intermediate_steps=True ,handle_parsing_errors=True
|
| 104 |
+
),
|
| 105 |
+
verbose=True,
|
| 106 |
+
max_iterations=max_iterations , return_intermediate_steps=True ,handle_parsing_errors=True
|
| 107 |
+
)
|
| 108 |
+
|
| 109 |
+
|
| 110 |
+
def run(self, prompt):
|
| 111 |
+
|
| 112 |
+
outputs = self.agent_executor1.invoke( {"input": prompt})
|
| 113 |
+
if outputs["intermediate_steps"] ==[]:
|
| 114 |
+
prompt = str(' ' +outputs["input"]+ ' ' + outputs["output"].split('Action:')[0] )
|
| 115 |
+
outputs = self.agent_executor2.invoke( {"input":prompt })
|
| 116 |
+
else:
|
| 117 |
+
prompt = str(' ' + outputs["input"] + ' ' + outputs["intermediate_steps"][0][0].log.split('Action:')[0])
|
| 118 |
+
outputs = self.agent_executor2.invoke( {"input": prompt})
|
| 119 |
+
return outputs
|
| 120 |
+
|
| 121 |
+
if __name__ == '__main__':
|
| 122 |
+
chem_model = TeLLAgent( temp=0.1, streaming=False,
|
| 123 |
+
openai_api_key =r'sk-itPrztYm9F6XZZpsBMJB9O7Vq0pYUABVVBSoThuBxEGTnDik',
|
| 124 |
+
image_path= r"C:\Users\BM109X32G-10GPU-02\Pictures\1735356359936.jpg"
|
| 125 |
+
)
|
| 126 |
+
chem_model.run(r"""what is Y20""")
|
app.py
ADDED
|
@@ -0,0 +1,219 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import asyncio
|
| 3 |
+
# Init with fake key
|
| 4 |
+
if 'OPENAI_API_KEY' not in os.environ:
|
| 5 |
+
os.environ['OPENAI_API_KEY'] = 'none'
|
| 6 |
+
if os.name == 'nt':
|
| 7 |
+
asyncio.set_event_loop_policy(asyncio.WindowsProactorEventLoopPolicy())
|
| 8 |
+
|
| 9 |
+
import openai
|
| 10 |
+
import pandas as pd
|
| 11 |
+
import streamlit as st
|
| 12 |
+
from IPython.core.display import HTML
|
| 13 |
+
from PIL import Image
|
| 14 |
+
from agent import TeLLAgent, make_tools
|
| 15 |
+
from streamlit_callback_handler import \
|
| 16 |
+
StreamlitCallbackHandlerChem
|
| 17 |
+
import base64
|
| 18 |
+
import pandas as pd
|
| 19 |
+
from dotenv import load_dotenv
|
| 20 |
+
from langchain_openai import ChatOpenAI , OpenAI
|
| 21 |
+
import base64
|
| 22 |
+
from io import BytesIO
|
| 23 |
+
from PIL import Image
|
| 24 |
+
import tempfile
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
def convert_to_base64(pil_image):
|
| 28 |
+
buffered = BytesIO()
|
| 29 |
+
pil_image.save(buffered, format="PNG")
|
| 30 |
+
img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
|
| 31 |
+
return img_str
|
| 32 |
+
|
| 33 |
+
def oai_key_isvalid(api_key):
|
| 34 |
+
"""Check if a given OpenAI key is valid"""
|
| 35 |
+
try:
|
| 36 |
+
llm = ChatOpenAI(openai_api_key = api_key, base_url="https://www.dmxapi.com/v1/")
|
| 37 |
+
out = llm.invoke("This is a test")
|
| 38 |
+
return True
|
| 39 |
+
except:
|
| 40 |
+
return False
|
| 41 |
+
|
| 42 |
+
load_dotenv()
|
| 43 |
+
ss = st.session_state
|
| 44 |
+
ss.prompt = None
|
| 45 |
+
|
| 46 |
+
# Set width of sidebar
|
| 47 |
+
st.markdown(
|
| 48 |
+
"""
|
| 49 |
+
<style>
|
| 50 |
+
[data-testid="stSidebar"][aria-expanded="true"]{
|
| 51 |
+
min-width: 450px;
|
| 52 |
+
max-width: 450px;
|
| 53 |
+
}
|
| 54 |
+
""",
|
| 55 |
+
unsafe_allow_html=True,
|
| 56 |
+
)
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
def instantiate_agent(model,file_path = '...',
|
| 60 |
+
image_path ='...'):
|
| 61 |
+
ss.agent = TeLLAgent(
|
| 62 |
+
model=model,
|
| 63 |
+
tools_model=model,
|
| 64 |
+
temp=0.1,
|
| 65 |
+
openai_api_key=ss.get('api_key') , file_path = file_path,
|
| 66 |
+
image_path =image_path
|
| 67 |
+
|
| 68 |
+
)
|
| 69 |
+
return ss.agent
|
| 70 |
+
|
| 71 |
+
instantiate_agent('gpt-4o-2024-11-20')
|
| 72 |
+
tools = ss.agent.agent_executor.tools
|
| 73 |
+
|
| 74 |
+
tool_list = pd.Series(
|
| 75 |
+
{f"✅ {t.name}":t.description for t in tools}
|
| 76 |
+
).reset_index()
|
| 77 |
+
tool_list.columns = ['Tool', 'Description']
|
| 78 |
+
|
| 79 |
+
def on_api_key_change():
|
| 80 |
+
api_key = ss.get('api_key') or os.getenv('OPENAI_API_KEY')
|
| 81 |
+
|
| 82 |
+
# Check if key is valid
|
| 83 |
+
if not oai_key_isvalid(api_key):
|
| 84 |
+
st.write("Please input a valid OpenAI API key.")
|
| 85 |
+
|
| 86 |
+
def run_prompt(prompt, file_path = '...', image_path = '...'):
|
| 87 |
+
agent = instantiate_agent(ss.get('model_select'),file_path = file_path, image_path =image_path)
|
| 88 |
+
st.chat_message("user").write(prompt)
|
| 89 |
+
with st.chat_message("assistant") :
|
| 90 |
+
try:
|
| 91 |
+
|
| 92 |
+
response = agent.run(prompt)
|
| 93 |
+
if ss.get('file_type') == 'CSV (.csv)':
|
| 94 |
+
try:
|
| 95 |
+
fx = pd.DataFrame(list(response))
|
| 96 |
+
st.markdown(":red[Prediction finished! ]")
|
| 97 |
+
st.download_button( "⬇️Download the predicted files as .csv", fx.to_csv(), "predict results.csv", use_container_width=True)
|
| 98 |
+
except:
|
| 99 |
+
st.write(response)
|
| 100 |
+
else:
|
| 101 |
+
st.write(response)
|
| 102 |
+
except openai.AuthenticationError:
|
| 103 |
+
st.write("Please input a valid OpenAI API key")
|
| 104 |
+
except openai.APIError:
|
| 105 |
+
# Handle specific API errors here
|
| 106 |
+
print("OpenAI API error, please try again!")
|
| 107 |
+
|
| 108 |
+
|
| 109 |
+
pre_prompts = [
|
| 110 |
+
'Who are you?',
|
| 111 |
+
('The history and development of Y6'
|
| 112 |
+
|
| 113 |
+
),
|
| 114 |
+
(
|
| 115 |
+
'Predict the LogP of Y6'
|
| 116 |
+
),
|
| 117 |
+
'Generate a donor material with PCE = 10'
|
| 118 |
+
]
|
| 119 |
+
|
| 120 |
+
# sidebar
|
| 121 |
+
with st.sidebar:
|
| 122 |
+
|
| 123 |
+
st.header("🤖 :blue[TeLLAgent] ")
|
| 124 |
+
# Input OpenAI api key
|
| 125 |
+
st.text_input(
|
| 126 |
+
'Input your OpenAI API key.',
|
| 127 |
+
placeholder = 'Input your OpenAI API key.',
|
| 128 |
+
type='password',
|
| 129 |
+
key='api_key',
|
| 130 |
+
on_change=on_api_key_change,
|
| 131 |
+
label_visibility="collapsed"
|
| 132 |
+
)
|
| 133 |
+
|
| 134 |
+
# Input model to use
|
| 135 |
+
st.selectbox(
|
| 136 |
+
'Select model to use',
|
| 137 |
+
['gpt-4o-2024-11-20', 'deepseek-v3', 'gpt-4o-mini'],
|
| 138 |
+
key='model_select',
|
| 139 |
+
)
|
| 140 |
+
|
| 141 |
+
# Display prompt examples
|
| 142 |
+
st.markdown('# What can I ask?')
|
| 143 |
+
cols = st.columns(2)
|
| 144 |
+
with cols[0]:
|
| 145 |
+
st.button(
|
| 146 |
+
r'👑 Who are you ? 🧨 ',
|
| 147 |
+
on_click=lambda: run_prompt(pre_prompts[0]),
|
| 148 |
+
)
|
| 149 |
+
st.button(
|
| 150 |
+
r'📚 The history and development of Y6 ',
|
| 151 |
+
on_click=lambda: run_prompt(pre_prompts[1]),
|
| 152 |
+
)
|
| 153 |
+
with cols[1]:
|
| 154 |
+
st.button(
|
| 155 |
+
r"🎄Predict the LogP of Y6 ",
|
| 156 |
+
on_click=lambda: run_prompt(pre_prompts[2]),
|
| 157 |
+
)
|
| 158 |
+
st.button(
|
| 159 |
+
r'💎 Generate a donor material with PCE = 10',
|
| 160 |
+
on_click=lambda: run_prompt(pre_prompts[3]),
|
| 161 |
+
)
|
| 162 |
+
|
| 163 |
+
st.selectbox(
|
| 164 |
+
'Select the file type ',
|
| 165 |
+
['None', 'CSV (.csv)', 'Figure (.jpg, .png, .jpeg)', 'PDF (.pdf)'],
|
| 166 |
+
key='file_type',
|
| 167 |
+
)
|
| 168 |
+
uploaded_file = None
|
| 169 |
+
if ss.get('file_type') == 'Figure (.jpg, .png, .jpeg)':
|
| 170 |
+
uploaded_file = st.file_uploader("Choose a Figure", type = ["jpg", "jpeg", "png"])
|
| 171 |
+
if ss.get('file_type') == 'PDF (.pdf)':
|
| 172 |
+
uploaded_file = st.file_uploader("Choose a PDF file")
|
| 173 |
+
if ss.get('file_type') == 'CSV (.csv)':
|
| 174 |
+
uploaded_file = st.file_uploader("Choose a csv file", type = 'csv')
|
| 175 |
+
|
| 176 |
+
# Display available tools
|
| 177 |
+
st.markdown(f"# {len(tool_list)} available tools")
|
| 178 |
+
st.dataframe(
|
| 179 |
+
tool_list,
|
| 180 |
+
use_container_width=True,
|
| 181 |
+
hide_index=True,
|
| 182 |
+
height=200
|
| 183 |
+
)
|
| 184 |
+
|
| 185 |
+
# Execute agent on user input
|
| 186 |
+
if prompt := st.chat_input("Say something and/or attach files"):
|
| 187 |
+
|
| 188 |
+
if uploaded_file is not None:
|
| 189 |
+
if ss.get('file_type') == 'CSV (.csv)':
|
| 190 |
+
with tempfile.NamedTemporaryFile( dir = 'j:/', suffix ='.csv' ,delete=False) as f:
|
| 191 |
+
f.write(uploaded_file.read())
|
| 192 |
+
run_prompt(prompt + str(' ') + str(f.name), file_path = f.name)
|
| 193 |
+
f.close()
|
| 194 |
+
|
| 195 |
+
if ss.get('file_type') == 'Figure (.jpg, .png, .jpeg)':
|
| 196 |
+
|
| 197 |
+
st.image(uploaded_file, width = 500)
|
| 198 |
+
with tempfile.NamedTemporaryFile(dir = 'j:/',delete=False, suffix=".png") as temp:
|
| 199 |
+
|
| 200 |
+
mg_str = base64.b64encode(uploaded_file.getvalue()).decode("utf-8")
|
| 201 |
+
temp.write(base64.b64decode(mg_str))
|
| 202 |
+
|
| 203 |
+
run_prompt(prompt+ str(' ') + str(temp.name), image_path = temp.name )
|
| 204 |
+
|
| 205 |
+
if ss.get('file_type') == 'PDF (.pdf)':
|
| 206 |
+
with tempfile.NamedTemporaryFile( dir = 'j:/', suffix ='.pdf' ,delete=False) as f:
|
| 207 |
+
f.write(uploaded_file.read())
|
| 208 |
+
run_prompt(prompt, file_path = f.name)
|
| 209 |
+
f.close()
|
| 210 |
+
|
| 211 |
+
# with open("input.png","wb") as af:
|
| 212 |
+
# mg_str = base64.b64encode(files.getvalue()).decode("utf-8")
|
| 213 |
+
# af.write(base64.b64decode(mg_str))
|
| 214 |
+
|
| 215 |
+
# run_prompt(prompt.text+str(f.name), image_path =f.name )
|
| 216 |
+
# except:
|
| 217 |
+
# st.markdown("Please input correct files or query ")
|
| 218 |
+
else:
|
| 219 |
+
run_prompt(prompt)
|
prompts.py
ADDED
|
@@ -0,0 +1,110 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# flake8: noqa
|
| 2 |
+
PREFIX = """
|
| 3 |
+
You are an AI system called TeLLAgent and your task is to respond to the question or
|
| 4 |
+
solve the problem to the best of your ability using the provided tools.
|
| 5 |
+
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
FORMAT_INSTRUCTIONS = """
|
| 9 |
+
You can only respond with a single complete
|
| 10 |
+
"Thought, Action, Action Input" format
|
| 11 |
+
OR a single "Final Answer" format.
|
| 12 |
+
|
| 13 |
+
Complete format:
|
| 14 |
+
|
| 15 |
+
Thought: (reflect on your progress and decide what to do next)
|
| 16 |
+
Action: (the action name, should be one of [{tool_names}])
|
| 17 |
+
Action Input: (the input string to the action)
|
| 18 |
+
|
| 19 |
+
OR
|
| 20 |
+
|
| 21 |
+
Final Answer: (the final answer to the original input question)
|
| 22 |
+
"""
|
| 23 |
+
|
| 24 |
+
QUESTION_PROMPT1 = """
|
| 25 |
+
Give detailed step-by-step solution to answer the question below using the following tools:
|
| 26 |
+
Don't simplify the step description of the process.
|
| 27 |
+
|
| 28 |
+
{tool_strings}
|
| 29 |
+
|
| 30 |
+
Use the tools provided, using the most specific tool available for each action.
|
| 31 |
+
Your final answer should contain all information necessary to answer the question and subquestions.
|
| 32 |
+
|
| 33 |
+
IMPORTANT: Your first step is to check the following:
|
| 34 |
+
|
| 35 |
+
1. Does the question contain the name of the molecule, CAS, or molecular graph?
|
| 36 |
+
if so, as a first step, you should consider if it needs to convert the graph using graphconverter, name using Mol2SMILES or CAS number using Query2SMILES to SMILES.
|
| 37 |
+
|
| 38 |
+
2. Were you asked to predict the power conversion efficiency (PCE) ?
|
| 39 |
+
if so, you are only allowed to choose one of the following tools.
|
| 40 |
+
acceptor_predictor to predict the PCE of one acceptor molecule
|
| 41 |
+
donor_predictor to predict the PCE of one donor molecule
|
| 42 |
+
dap_predictor should be use when both the donor and acceptor molecule are offered
|
| 43 |
+
|
| 44 |
+
3. Is the question about image,figure,graph or paper files ?
|
| 45 |
+
if so, the papers or images have already been provided or referenced in some way.
|
| 46 |
+
you should use ImageAnalysis or pdfreader to solve the question.
|
| 47 |
+
Do not use other tools.
|
| 48 |
+
|
| 49 |
+
4. Were you ask to answer questions that require technical or general information ,
|
| 50 |
+
if so, you should combine the results from WebSearch, wikipedia and rag tool.
|
| 51 |
+
|
| 52 |
+
5. when you use the tool rag, you do not process the answer, return the results directly.
|
| 53 |
+
|
| 54 |
+
6.Do you need to work with images, you need to figure out the difference between the two tools Imageanalysis and graphconverter,
|
| 55 |
+
if you want to get SMILES of molecules choose graphconverter, if you want to analyze or read images use Imageanalysis.
|
| 56 |
+
|
| 57 |
+
Question: {input}
|
| 58 |
+
"""
|
| 59 |
+
|
| 60 |
+
QUESTION_PROMPT = """
|
| 61 |
+
Answer the question below using the following tools:
|
| 62 |
+
|
| 63 |
+
{tool_strings}
|
| 64 |
+
|
| 65 |
+
Use the tools provided, using the most specific tool available for each action.
|
| 66 |
+
Your final answer should contain all information necessary to answer the question and subquestions.
|
| 67 |
+
|
| 68 |
+
IMPORTANT: Your first step is to check the following:
|
| 69 |
+
1. Were you need to convert the molecular graph, name or CAS number to SMILES.
|
| 70 |
+
if so, as a first step, you should use graphconvertor, Query2SMILES(chemspace_api_key), Query2CAS() tools.
|
| 71 |
+
|
| 72 |
+
2. Were you asked to predict the power conversion efficiency (PCE) ?
|
| 73 |
+
if so, as a first step, you should consider if it needs to convert the graph, name or CAS number to SMILES.
|
| 74 |
+
Then, as a second step, you are only allowed to choose one of the following tools.
|
| 75 |
+
acceptor_predictor to predict the PCE of acceptor molecule
|
| 76 |
+
donor_predictor to predict the PCE of donor molecule
|
| 77 |
+
|
| 78 |
+
3. Were you ask to generate answer according to image or paper files,
|
| 79 |
+
if so, the papers or images have already been provided or referenced in some way.
|
| 80 |
+
you are only allowed to output the answer by using ImageAnalysis and pdfreader .
|
| 81 |
+
Do not use other tools.
|
| 82 |
+
|
| 83 |
+
4. Were you ask to answer questions that require technical or general inquiry about a term or concept,
|
| 84 |
+
if so, you should use rag tool first, then WebSearch, final wikipedia,
|
| 85 |
+
|
| 86 |
+
5. when you use the tool rag, you do not process the answer, return the results directly.
|
| 87 |
+
Question: {input}
|
| 88 |
+
"""
|
| 89 |
+
|
| 90 |
+
SUFFIX = """
|
| 91 |
+
|
| 92 |
+
Thought: {agent_scratchpad}
|
| 93 |
+
"""
|
| 94 |
+
FINAL_ANSWER_ACTION = "Final Answer:"
|
| 95 |
+
|
| 96 |
+
|
| 97 |
+
REPHRASE_TEMPLATE = """In this exercise you will assume the role of a scientific assistant named TeLLAgent. Your task is to answer the provided question as best as you can, based on the provided solution draft.
|
| 98 |
+
The solution draft follows the format "Thought, Action, Action Input, Observation", where the 'Thought' statements describe a reasoning sequence. The rest of the text is information obtained to complement the reasoning sequence, and it is 100% accurate.
|
| 99 |
+
Your task is to write an answer to the question based on the solution draft, and the following guidelines:
|
| 100 |
+
You need to be as detailed as possible in your answers to the questions and reduce the processing of the tool's output.
|
| 101 |
+
Direct output the results when using Imageanalysis, codewriter, pdfreader, rag tools without further processing.
|
| 102 |
+
|
| 103 |
+
The text should have an educative and assistant-like tone, be accurate, follow the same reasoning sequence than the solution draft and explain how any conclusion is reached.
|
| 104 |
+
Question: {question}
|
| 105 |
+
|
| 106 |
+
Solution draft: {agent_ans}
|
| 107 |
+
|
| 108 |
+
Answer:
|
| 109 |
+
"""
|
| 110 |
+
|
requirement.txt
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
paper-scraper @ git+https://github.com/blackadad/paper-scraper.git
|
| 2 |
+
numpy==1.26.4
|
| 3 |
+
rdkit==2023.9.53
|
| 4 |
+
streamlit
|
| 5 |
+
google-search-results==2.4.2
|
| 6 |
+
python-dotenv
|
| 7 |
+
wikipedia
|
| 8 |
+
torch==2.4.1 torchvision==0.19.1 torchaudio==2.4.1 --index-url https://download.pytorch.org/whl/cu118
|
| 9 |
+
pydantic==2.10.4
|
| 10 |
+
wandb
|
| 11 |
+
langchain==0.3.19
|
| 12 |
+
langchain-core==0.3.39
|
| 13 |
+
langchain-community==0.3.18
|
| 14 |
+
langchain-openai==0.3.1
|
| 15 |
+
typing-inspect==0.8.0
|
| 16 |
+
typing_extensions==4.12.2
|
| 17 |
+
paper-qa
|
| 18 |
+
pandas
|
| 19 |
+
molsets
|
| 20 |
+
scipy==1.15.2
|
| 21 |
+
selfies==2.2.0
|
| 22 |
+
browser-use
|
search.py
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from langchain_openai import ChatOpenAI
|
| 2 |
+
from browser_use import Agent
|
| 3 |
+
import asyncio
|
| 4 |
+
from dotenv import load_dotenv
|
| 5 |
+
load_dotenv()
|
| 6 |
+
from langchain.tools import BaseTool
|
| 7 |
+
|
| 8 |
+
class SMILES2Weight(BaseTool):
|
| 9 |
+
name: str = "webuse"
|
| 10 |
+
description: str = ("Calling the browser to search for information "
|
| 11 |
+
"input query, return the searching results")
|
| 12 |
+
|
| 13 |
+
def __init__(
|
| 14 |
+
self,
|
| 15 |
+
):
|
| 16 |
+
super().__init__()
|
| 17 |
+
|
| 18 |
+
async def _run(self, task: str) -> str:
|
| 19 |
+
agent = Agent(
|
| 20 |
+
task="Go to Reddit, search for 'browser-use', click on the first post and return the first comment.",
|
| 21 |
+
llm = ChatOpenAI(model="gpt-4o-2024-11-20",api_key='sk-itPrztYm9F6XZZpsBMJB9O7Vq0pYUABVVBSoThuBxEGTnDik',
|
| 22 |
+
base_url="https://www.dmxapi.com/v1"),
|
| 23 |
+
)
|
| 24 |
+
result = await agent.run()
|
| 25 |
+
return result
|
| 26 |
+
|
| 27 |
+
async def _arun(self, smiles: str) -> str:
|
| 28 |
+
"""Use the tool asynchronously."""
|
| 29 |
+
raise NotImplementedError()
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
|
streamlit_callback_handler.py
ADDED
|
@@ -0,0 +1,204 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import Any, Dict, List, Optional
|
| 2 |
+
|
| 3 |
+
from langchain.callbacks.streamlit.streamlit_callback_handler import (
|
| 4 |
+
|
| 5 |
+
LLMThought,
|
| 6 |
+
LLMThoughtLabeler,
|
| 7 |
+
LLMThoughtState,
|
| 8 |
+
StreamlitCallbackHandler,
|
| 9 |
+
ToolRecord,
|
| 10 |
+
)
|
| 11 |
+
from langchain_core.agents import AgentAction, AgentFinish
|
| 12 |
+
from streamlit.delta_generator import DeltaGenerator
|
| 13 |
+
|
| 14 |
+
from utils import is_smiles
|
| 15 |
+
|
| 16 |
+
import requests
|
| 17 |
+
from langchain import LLMChain, PromptTemplate
|
| 18 |
+
from langchain.chat_models import ChatOpenAI
|
| 19 |
+
from rdkit import Chem
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
def cdk(smiles):
|
| 23 |
+
"""
|
| 24 |
+
Get a depiction of some smiles.
|
| 25 |
+
"""
|
| 26 |
+
|
| 27 |
+
url = "https://www.simolecule.com/cdkdepict/depict/wob/svg"
|
| 28 |
+
headers = {"Content-Type": "application/json"}
|
| 29 |
+
response = requests.get(
|
| 30 |
+
url,
|
| 31 |
+
headers=headers,
|
| 32 |
+
params={
|
| 33 |
+
"smi": smiles,
|
| 34 |
+
"annotate": "colmap",
|
| 35 |
+
"zoom": 2,
|
| 36 |
+
"w": 150,
|
| 37 |
+
"h": 80,
|
| 38 |
+
"abbr": "off",
|
| 39 |
+
},
|
| 40 |
+
)
|
| 41 |
+
return response.text
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
class LLMThoughtChem(LLMThought):
|
| 45 |
+
def __init__(
|
| 46 |
+
self,
|
| 47 |
+
parent_container: DeltaGenerator,
|
| 48 |
+
labeler: LLMThoughtLabeler,
|
| 49 |
+
expanded: bool,
|
| 50 |
+
collapse_on_complete: bool,
|
| 51 |
+
):
|
| 52 |
+
super().__init__(
|
| 53 |
+
parent_container,
|
| 54 |
+
labeler,
|
| 55 |
+
expanded,
|
| 56 |
+
collapse_on_complete,
|
| 57 |
+
)
|
| 58 |
+
|
| 59 |
+
def on_tool_end(
|
| 60 |
+
self,
|
| 61 |
+
output: str,
|
| 62 |
+
color: Optional[str] = None,
|
| 63 |
+
observation_prefix: Optional[str] = None,
|
| 64 |
+
llm_prefix: Optional[str] = None,
|
| 65 |
+
output_ph: dict = {},
|
| 66 |
+
input_tool: str = "",
|
| 67 |
+
serialized: dict = {},
|
| 68 |
+
**kwargs: Any,
|
| 69 |
+
) -> None:
|
| 70 |
+
# Depending on the tool name, decide what to display.
|
| 71 |
+
if serialized["name"] == "Name2SMILES":
|
| 72 |
+
safe_smiles = output.replace("[", "\[").replace("]", "\]")
|
| 73 |
+
if is_smiles(output):
|
| 74 |
+
self._container.markdown(
|
| 75 |
+
f"**{safe_smiles}**{cdk(output)}", unsafe_allow_html=True
|
| 76 |
+
)
|
| 77 |
+
|
| 78 |
+
if serialized["name"] == "ReactionPredict":
|
| 79 |
+
rxn = f"{input_tool}>>{output}"
|
| 80 |
+
safe_smiles = rxn.replace("[", "\[").replace("]", "\]")
|
| 81 |
+
self._container.markdown(
|
| 82 |
+
f"**{safe_smiles}**{cdk(rxn)}", unsafe_allow_html=True
|
| 83 |
+
)
|
| 84 |
+
|
| 85 |
+
if serialized["name"] == "ReactionRetrosynthesis":
|
| 86 |
+
output = output.replace("[", "\[").replace("]", "\]")
|
| 87 |
+
|
| 88 |
+
def on_tool_start(
|
| 89 |
+
self, serialized: Dict[str, Any], input_str: str, **kwargs: Any
|
| 90 |
+
) -> None:
|
| 91 |
+
# Called with the name of the tool we're about to run (in `serialized[name]`),
|
| 92 |
+
# and its input. We change our container's label to be the tool name.
|
| 93 |
+
self._state = LLMThoughtState.RUNNING_TOOL
|
| 94 |
+
tool_name = serialized["name"]
|
| 95 |
+
self._last_tool = ToolRecord(name=tool_name, input_str=input_str)
|
| 96 |
+
self._container.update(
|
| 97 |
+
new_label=(
|
| 98 |
+
self._labeler.get_tool_label(self._last_tool, is_complete=False)
|
| 99 |
+
.replace("[", "\[")
|
| 100 |
+
.replace("]", "\]")
|
| 101 |
+
)
|
| 102 |
+
)
|
| 103 |
+
|
| 104 |
+
# Display note of potential long time
|
| 105 |
+
if serialized["name"] == "ReactionRetrosynthesis" or serialized["name"] == "LiteratureSearch":
|
| 106 |
+
self._container.markdown(
|
| 107 |
+
f"‼️ Note: This tool can take some time to complete execution ‼️",
|
| 108 |
+
unsafe_allow_html=True,
|
| 109 |
+
)
|
| 110 |
+
|
| 111 |
+
def complete(self, final_label: Optional[str] = None) -> None:
|
| 112 |
+
"""Finish the thought."""
|
| 113 |
+
if final_label is None and self._state == LLMThoughtState.RUNNING_TOOL:
|
| 114 |
+
assert (
|
| 115 |
+
self._last_tool is not None
|
| 116 |
+
), "_last_tool should never be null when _state == RUNNING_TOOL"
|
| 117 |
+
final_label = self._labeler.get_tool_label(
|
| 118 |
+
self._last_tool, is_complete=True
|
| 119 |
+
)
|
| 120 |
+
self._state = LLMThoughtState.COMPLETE
|
| 121 |
+
|
| 122 |
+
final_label = final_label.replace("[", "\[").replace("]", "\]")
|
| 123 |
+
if self._collapse_on_complete:
|
| 124 |
+
self._container.update(new_label=final_label, new_expanded=False)
|
| 125 |
+
else:
|
| 126 |
+
self._container.update(new_label=final_label)
|
| 127 |
+
|
| 128 |
+
|
| 129 |
+
class StreamlitCallbackHandlerChem(StreamlitCallbackHandler):
|
| 130 |
+
def __init__(
|
| 131 |
+
self,
|
| 132 |
+
parent_container: DeltaGenerator,
|
| 133 |
+
*,
|
| 134 |
+
max_thought_containers: int = 4,
|
| 135 |
+
expand_new_thoughts: bool = True,
|
| 136 |
+
collapse_completed_thoughts: bool = True,
|
| 137 |
+
thought_labeler: Optional[LLMThoughtLabeler] = None,
|
| 138 |
+
output_placeholder: dict = {},
|
| 139 |
+
):
|
| 140 |
+
super(StreamlitCallbackHandlerChem, self).__init__(
|
| 141 |
+
parent_container,
|
| 142 |
+
max_thought_containers=max_thought_containers,
|
| 143 |
+
expand_new_thoughts=expand_new_thoughts,
|
| 144 |
+
collapse_completed_thoughts=collapse_completed_thoughts,
|
| 145 |
+
thought_labeler=thought_labeler,
|
| 146 |
+
)
|
| 147 |
+
|
| 148 |
+
self._output_placeholder = output_placeholder
|
| 149 |
+
self.last_input = ""
|
| 150 |
+
|
| 151 |
+
def on_llm_start(
|
| 152 |
+
self, serialized: Dict[str, Any], prompts: List[str], **kwargs: Any
|
| 153 |
+
) -> None:
|
| 154 |
+
if self._current_thought is None:
|
| 155 |
+
self._current_thought = LLMThoughtChem(
|
| 156 |
+
parent_container=self._parent_container,
|
| 157 |
+
expanded=self._expand_new_thoughts,
|
| 158 |
+
collapse_on_complete=self._collapse_completed_thoughts,
|
| 159 |
+
labeler=self._thought_labeler,
|
| 160 |
+
)
|
| 161 |
+
|
| 162 |
+
self._current_thought.on_llm_start(serialized, prompts)
|
| 163 |
+
|
| 164 |
+
# We don't prune_old_thought_containers here, because our container won't
|
| 165 |
+
# be visible until it has a child.
|
| 166 |
+
|
| 167 |
+
def on_tool_start(
|
| 168 |
+
self, serialized: Dict[str, Any], input_str: str, **kwargs: Any
|
| 169 |
+
) -> None:
|
| 170 |
+
self._require_current_thought().on_tool_start(serialized, input_str, **kwargs)
|
| 171 |
+
self._prune_old_thought_containers()
|
| 172 |
+
self._last_input = input_str
|
| 173 |
+
self._serialized = serialized
|
| 174 |
+
|
| 175 |
+
def on_tool_end(
|
| 176 |
+
self,
|
| 177 |
+
output: str,
|
| 178 |
+
color: Optional[str] = None,
|
| 179 |
+
observation_prefix: Optional[str] = None,
|
| 180 |
+
llm_prefix: Optional[str] = None,
|
| 181 |
+
**kwargs: Any,
|
| 182 |
+
) -> None:
|
| 183 |
+
self._require_current_thought().on_tool_end(
|
| 184 |
+
output,
|
| 185 |
+
color,
|
| 186 |
+
observation_prefix,
|
| 187 |
+
llm_prefix,
|
| 188 |
+
output_ph=self._output_placeholder,
|
| 189 |
+
input_tool=self._last_input,
|
| 190 |
+
serialized=self._serialized,
|
| 191 |
+
**kwargs,
|
| 192 |
+
)
|
| 193 |
+
self._complete_current_thought()
|
| 194 |
+
|
| 195 |
+
def on_agent_finish(
|
| 196 |
+
self, finish: AgentFinish, color: Optional[str] = None, **kwargs: Any
|
| 197 |
+
) -> None:
|
| 198 |
+
if self._current_thought is not None:
|
| 199 |
+
self._current_thought.complete(
|
| 200 |
+
self._thought_labeler.get_final_agent_thought_label()
|
| 201 |
+
.replace("[", "\[")
|
| 202 |
+
.replace("]", "\]")
|
| 203 |
+
)
|
| 204 |
+
self._current_thought = None
|
test.py
ADDED
|
@@ -0,0 +1,262 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# -*- coding: utf-8 -*-
|
| 2 |
+
"""
|
| 3 |
+
Created on Tue Mar 25 16:09:34 2025
|
| 4 |
+
|
| 5 |
+
@author: BM109X32G-10GPU-02
|
| 6 |
+
"""
|
| 7 |
+
import os
|
| 8 |
+
import sys
|
| 9 |
+
import asyncio
|
| 10 |
+
import streamlit as st
|
| 11 |
+
from dotenv import load_dotenv
|
| 12 |
+
|
| 13 |
+
# Ensure local repository (browser_use) is accessible
|
| 14 |
+
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
| 15 |
+
|
| 16 |
+
from browser_use import Agent
|
| 17 |
+
from browser_use.browser.browser import Browser, BrowserConfig
|
| 18 |
+
from browser_use.controller.service import Controller
|
| 19 |
+
from langchain_openai import ChatOpenAI
|
| 20 |
+
# Load environment variables
|
| 21 |
+
load_dotenv()
|
| 22 |
+
|
| 23 |
+
if os.name == 'nt':
|
| 24 |
+
asyncio.set_event_loop_policy(asyncio.WindowsProactorEventLoopPolicy())
|
| 25 |
+
|
| 26 |
+
# Function to get the LLM based on provider
|
| 27 |
+
def get_llm(provider: str):
|
| 28 |
+
if provider == 'anthropic':
|
| 29 |
+
|
| 30 |
+
api_key = 'sk-itPrztYm9F6XZZpsBMJB9O7Vq0pYUABVVBSoThuBxEGTnDik'
|
| 31 |
+
if not api_key:
|
| 32 |
+
st.error("Error: ANTHROPIC_API_KEY is not set. Please provide a valid API key.")
|
| 33 |
+
st.stop()
|
| 34 |
+
|
| 35 |
+
return ChatOpenAI(openai_api_key = api_key, base_url="https://www.dmxapi.com/v1/")
|
| 36 |
+
elif provider == 'openai':
|
| 37 |
+
|
| 38 |
+
api_key = 'sk-itPrztYm9F6XZZpsBMJB9O7Vq0pYUABVVBSoThuBxEGTnDik'
|
| 39 |
+
if not api_key:
|
| 40 |
+
st.error("Error: OPENAI_API_KEY is not set. Please provide a valid API key.")
|
| 41 |
+
st.stop()
|
| 42 |
+
|
| 43 |
+
return ChatOpenAI(openai_api_key = api_key, base_url="https://www.dmxapi.com/v1/")
|
| 44 |
+
else:
|
| 45 |
+
st.error(f'Unsupported provider: {provider}')
|
| 46 |
+
st.stop()
|
| 47 |
+
|
| 48 |
+
# Function to initialize the agent
|
| 49 |
+
def initialize_agent(query: str, provider: str):
|
| 50 |
+
llm = get_llm(provider)
|
| 51 |
+
controller = Controller()
|
| 52 |
+
browser = Browser(config=BrowserConfig())
|
| 53 |
+
|
| 54 |
+
return Agent(
|
| 55 |
+
task=query,
|
| 56 |
+
llm=llm,
|
| 57 |
+
controller=controller,
|
| 58 |
+
browser=browser,
|
| 59 |
+
use_vision=True,
|
| 60 |
+
max_actions_per_step=1,
|
| 61 |
+
), browser
|
| 62 |
+
|
| 63 |
+
# Streamlit UI
|
| 64 |
+
st.title("Automated Browser Agent with LLMs 🤖")
|
| 65 |
+
if st.button("Foo"):
|
| 66 |
+
st.session_state.value = "Foo"
|
| 67 |
+
st.rerun()
|
| 68 |
+
query = st.text_input("Enter your query:", "go to reddit and search for posts about browser-use")
|
| 69 |
+
provider = st.radio("Select LLM Provider:", ["openai", "anthropic"], index=0)
|
| 70 |
+
|
| 71 |
+
if st.button("Run Agent"):
|
| 72 |
+
st.write("Initializing agent...")
|
| 73 |
+
agent, browser = initialize_agent(query, provider)
|
| 74 |
+
|
| 75 |
+
async def run_agent():
|
| 76 |
+
with st.spinner("Running automation..."):
|
| 77 |
+
await agent.run(max_steps=25)
|
| 78 |
+
st.success("Task completed! 🎉")
|
| 79 |
+
|
| 80 |
+
asyncio.run(run_agent())
|
| 81 |
+
|
| 82 |
+
st.button("Close Browser", on_click=lambda: asyncio.run(browser.close()))
|
| 83 |
+
import streamlit as st
|
| 84 |
+
|
| 85 |
+
if "value" not in st.session_state:
|
| 86 |
+
st.session_state.value = "Title"
|
| 87 |
+
|
| 88 |
+
##### Option using st.rerun #####
|
| 89 |
+
st.header(st.session_state.value)
|
| 90 |
+
|
| 91 |
+
|
| 92 |
+
# from langchain.chains import LLMChain, SimpleSequentialChain, RetrievalQA, ConversationalRetrievalChain
|
| 93 |
+
|
| 94 |
+
# from langchain import PromptTemplate
|
| 95 |
+
|
| 96 |
+
# from langchain.tools import BaseTool
|
| 97 |
+
|
| 98 |
+
# from langchain_core.messages import HumanMessage, SystemMessage
|
| 99 |
+
# from langchain.base_language import BaseLanguageModel
|
| 100 |
+
# from langchain.text_splitter import CharacterTextSplitter
|
| 101 |
+
|
| 102 |
+
|
| 103 |
+
# from langchain_community.document_loaders import PyPDFLoader
|
| 104 |
+
# from langchain_community.vectorstores import FAISS
|
| 105 |
+
# from langchain_openai import ChatOpenAI
|
| 106 |
+
# from langchain_openai import OpenAIEmbeddings
|
| 107 |
+
|
| 108 |
+
# from langchain_community.document_loaders import PyPDFLoader
|
| 109 |
+
# import streamlit as st
|
| 110 |
+
# import pandas as pd
|
| 111 |
+
# import base64
|
| 112 |
+
|
| 113 |
+
# from langchain_community.embeddings import OllamaEmbeddings
|
| 114 |
+
# from langchain.tools import BaseTool
|
| 115 |
+
# from langchain_openai import ChatOpenAI
|
| 116 |
+
# from langchain_core.messages import HumanMessage, SystemMessage
|
| 117 |
+
# from langchain.base_language import BaseLanguageModel
|
| 118 |
+
# import base64
|
| 119 |
+
# from io import BytesIO
|
| 120 |
+
# from PIL import Image
|
| 121 |
+
# from langchain_community.embeddings import OllamaEmbeddings
|
| 122 |
+
# from langchain.tools import BaseTool
|
| 123 |
+
# from langchain_openai import ChatOpenAI
|
| 124 |
+
# from langchain_core.messages import HumanMessage, SystemMessage
|
| 125 |
+
# from langchain.base_language import BaseLanguageModel
|
| 126 |
+
|
| 127 |
+
|
| 128 |
+
# from tempfile import NamedTemporaryFile
|
| 129 |
+
# import streamlit as st
|
| 130 |
+
|
| 131 |
+
# uploaded_file = st.file_uploader("File upload")
|
| 132 |
+
|
| 133 |
+
# def convert_to_base64(pil_image):
|
| 134 |
+
# buffered = BytesIO()
|
| 135 |
+
# pil_image.save(buffered, format="PNG")
|
| 136 |
+
# img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
|
| 137 |
+
# return img_str
|
| 138 |
+
|
| 139 |
+
# prompt = st.chat_input(
|
| 140 |
+
# "Say something and/or attach an image",
|
| 141 |
+
# accept_file=True,
|
| 142 |
+
|
| 143 |
+
# )
|
| 144 |
+
# uploaded_file = st.file_uploader("Choose a file")
|
| 145 |
+
# template = """
|
| 146 |
+
|
| 147 |
+
# You are an expert chemist and your task is to respond to the question or
|
| 148 |
+
# solve the problem to the best of your ability. You need to answer in as much detail as possible.
|
| 149 |
+
# You can only respond with a single "Final Answer" format.
|
| 150 |
+
# Use the following pieces of context to answer the question at the end.
|
| 151 |
+
# If you don't know the answer, just say that you don't know, don't try to make up an answer.
|
| 152 |
+
# <context>
|
| 153 |
+
# {context}
|
| 154 |
+
# </context>
|
| 155 |
+
|
| 156 |
+
# Question: {question}
|
| 157 |
+
# Answer:
|
| 158 |
+
|
| 159 |
+
# """
|
| 160 |
+
|
| 161 |
+
# import tempfile
|
| 162 |
+
# import base64
|
| 163 |
+
# from io import BytesIO
|
| 164 |
+
# from PIL import Image
|
| 165 |
+
|
| 166 |
+
|
| 167 |
+
# if uploaded_file is None:
|
| 168 |
+
# st.markdown(prompt)
|
| 169 |
+
# if uploaded_file is not None:
|
| 170 |
+
# # try:
|
| 171 |
+
|
| 172 |
+
# # loader = PyPDFLoader(uploaded_file)
|
| 173 |
+
# # documents = loader.load()
|
| 174 |
+
# # st.markdown(prompt)
|
| 175 |
+
# # except:
|
| 176 |
+
# # #try:
|
| 177 |
+
|
| 178 |
+
# # file = pd.read_csv(uploaded_file)
|
| 179 |
+
# # st.markdown(prompt)
|
| 180 |
+
# #
|
| 181 |
+
# # with tempfile.NamedTemporaryFile( dir = 'j:/', suffix ='.png' ,delete=False) as f:
|
| 182 |
+
# with tempfile.NamedTemporaryFile(dir = 'j:/',delete=False, suffix=".png") as temp_file:
|
| 183 |
+
|
| 184 |
+
# mg_str = base64.b64encode(uploaded_file.getvalue()).decode("utf-8")
|
| 185 |
+
|
| 186 |
+
|
| 187 |
+
# temp_file.write(base64.b64decode(mg_str))
|
| 188 |
+
|
| 189 |
+
# # image = Image.new('RGB', (100, 100), color='blue')
|
| 190 |
+
# # image.save(uploaded_file.getvalue(), format='PNG')
|
| 191 |
+
# # loaded_image = Image.open(f.name)
|
| 192 |
+
|
| 193 |
+
# pil_image = Image.open(temp_file.name)
|
| 194 |
+
# rgb_im = pil_image.convert('RGB')
|
| 195 |
+
# image_b64 = convert_to_base64(pil_image)
|
| 196 |
+
# query = 'what can you see in the image'
|
| 197 |
+
# llm = ChatOpenAI(model="gpt-4o-2024-11-20",api_key='sk-itPrztYm9F6XZZpsBMJB9O7Vq0pYUABVVBSoThuBxEGTnDik',
|
| 198 |
+
# base_url="https://www.dmxapi.com/v1")
|
| 199 |
+
# message = HumanMessage(
|
| 200 |
+
# content=[
|
| 201 |
+
# {"type": "text", "text": query},
|
| 202 |
+
# {
|
| 203 |
+
# "type": "image_url",
|
| 204 |
+
# "image_url": {"url":f"data:image/jpeg;base64,{image_b64}"},
|
| 205 |
+
# },
|
| 206 |
+
# ],)
|
| 207 |
+
|
| 208 |
+
# response = llm.invoke([message])
|
| 209 |
+
# st.markdown(response.content)
|
| 210 |
+
|
| 211 |
+
# # mg_str = base64.b64encode(files.getvalue()).decode("utf-8")
|
| 212 |
+
|
| 213 |
+
# # img_str = base64.b64encode(files.getvalue()).decode("utf-8")
|
| 214 |
+
# # # image_b64 = convert_to_base64(files.getvalue())
|
| 215 |
+
|
| 216 |
+
# #
|
| 217 |
+
# # st.markdown(query)
|
| 218 |
+
# # st.markdown(response.content)
|
| 219 |
+
|
| 220 |
+
|
| 221 |
+
|
| 222 |
+
|
| 223 |
+
|
| 224 |
+
|
| 225 |
+
|
| 226 |
+
|
| 227 |
+
|
| 228 |
+
|
| 229 |
+
|
| 230 |
+
|
| 231 |
+
|
| 232 |
+
|
| 233 |
+
|
| 234 |
+
|
| 235 |
+
|
| 236 |
+
# with open("input.pdf","wb") as f:
|
| 237 |
+
# base64_pdf = base64.b64encode(prompt["files"][0].read()).decode('utf-8')
|
| 238 |
+
# f.write(base64.b64decode(base64_pdf))
|
| 239 |
+
# loader = PyPDFLoader(f.name)
|
| 240 |
+
# documents = loader.load()
|
| 241 |
+
|
| 242 |
+
# text_splitter = CharacterTextSplitter(chunk_size=6000, chunk_overlap=1000)
|
| 243 |
+
# docs = text_splitter.split_documents(documents)
|
| 244 |
+
# embeddings = OpenAIEmbeddings(model="text-embedding-3-large",api_key='sk-itPrztYm9F6XZZpsBMJB9O7Vq0pYUABVVBSoThuBxEGTnDik',
|
| 245 |
+
# base_url="https://www.dmxapi.com/v1")
|
| 246 |
+
|
| 247 |
+
|
| 248 |
+
# vectorstore = FAISS.from_documents(docs, embeddings)
|
| 249 |
+
# prompt = PromptTemplate(template=template, input_variables=[ "question"])
|
| 250 |
+
# qa_chain = RetrievalQA.from_chain_type(
|
| 251 |
+
# llm= ChatOpenAI(model="gpt-4o-2024-11-20",api_key='sk-itPrztYm9F6XZZpsBMJB9O7Vq0pYUABVVBSoThuBxEGTnDik',
|
| 252 |
+
# base_url="https://www.dmxapi.com/v1"),
|
| 253 |
+
# chain_type="stuff",
|
| 254 |
+
# retriever=vectorstore.as_retriever(search_kwargs={"k": 2}),
|
| 255 |
+
# return_source_documents=True,
|
| 256 |
+
# chain_type_kwargs={"prompt": prompt},
|
| 257 |
+
# )
|
| 258 |
+
|
| 259 |
+
# result = qa_chain.invoke('what is the main point')
|
| 260 |
+
# st.markdown(result['result'])
|
| 261 |
+
# a = pd.read_csv(prompt["files"][0] )
|
| 262 |
+
# st.markdown(a)
|
tools.py
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
|
| 3 |
+
from langchain_community.agent_toolkits.load_tools import load_tools
|
| 4 |
+
from langchain.base_language import BaseLanguageModel
|
| 5 |
+
|
| 6 |
+
from tool import *
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
def make_tools(llm: BaseLanguageModel, api_keys: dict = {}, verbose=True, image_path = None, file_path = None):
|
| 10 |
+
serp_api_key = api_keys.get("SERP_API_KEY") or os.getenv("SERP_API_KEY")
|
| 11 |
+
image_path = image_path
|
| 12 |
+
file_path = file_path
|
| 13 |
+
openai_api_key = api_keys.get("OPENAI_API_KEY") or os.getenv("OPENAI_API_KEY")
|
| 14 |
+
chemspace_api_key = api_keys.get("CHEMSPACE_API_KEY") or os.getenv(
|
| 15 |
+
"CHEMSPACE_API_KEY"
|
| 16 |
+
)
|
| 17 |
+
semantic_scholar_api_key = api_keys.get("SEMANTIC_SCHOLAR_API_KEY") or os.getenv(
|
| 18 |
+
"SEMANTIC_SCHOLAR_API_KEY"
|
| 19 |
+
)
|
| 20 |
+
serp_api_key = '3795acda6a74ea15033d34b54eac82982b26f559147d9cf04aca4bfca91c3e9d'
|
| 21 |
+
all_tools = load_tools(
|
| 22 |
+
[
|
| 23 |
+
#"python_repl",
|
| 24 |
+
# "ddg-search",
|
| 25 |
+
"wikipedia",
|
| 26 |
+
# "human"
|
| 27 |
+
]
|
| 28 |
+
)
|
| 29 |
+
|
| 30 |
+
all_tools += [
|
| 31 |
+
browseruse(),
|
| 32 |
+
|
| 33 |
+
rag(),
|
| 34 |
+
codewriter(),
|
| 35 |
+
|
| 36 |
+
Query2SMILES(chemspace_api_key),
|
| 37 |
+
Mol2SMILES(chemspace_api_key) ,
|
| 38 |
+
Query2CAS(),
|
| 39 |
+
SMILES2Name(),
|
| 40 |
+
SMILES2SAScore(),
|
| 41 |
+
SMILES2LogP(),
|
| 42 |
+
SMILES2Properties(),
|
| 43 |
+
MolSimilarity(),
|
| 44 |
+
SMILES2Weight(),
|
| 45 |
+
FuncGroups(),
|
| 46 |
+
donor_predictor(),
|
| 47 |
+
acceptor_predictor(),
|
| 48 |
+
homolumo_predictor(),
|
| 49 |
+
dap_screen(),
|
| 50 |
+
graphconverter(),
|
| 51 |
+
molgen(),
|
| 52 |
+
dap_predictor(),
|
| 53 |
+
Scholar2ResultLLM(
|
| 54 |
+
llm=llm,
|
| 55 |
+
openai_api_key=openai_api_key,
|
| 56 |
+
semantic_scholar_api_key=semantic_scholar_api_key ),
|
| 57 |
+
]
|
| 58 |
+
|
| 59 |
+
if serp_api_key:
|
| 60 |
+
all_tools += [WebSearch(serp_api_key)
|
| 61 |
+
]
|
| 62 |
+
if image_path is not None:
|
| 63 |
+
all_tools += [Imageanalysis(image_path),
|
| 64 |
+
|
| 65 |
+
]
|
| 66 |
+
if file_path is not None:
|
| 67 |
+
all_tools += [pdfreader(file_path),
|
| 68 |
+
]
|
| 69 |
+
|
| 70 |
+
return all_tools
|
utils.py
ADDED
|
@@ -0,0 +1,159 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import re
|
| 2 |
+
|
| 3 |
+
import requests
|
| 4 |
+
from rdkit import Chem, DataStructs
|
| 5 |
+
from rdkit.Chem import AllChem
|
| 6 |
+
|
| 7 |
+
from urllib.request import urlopen
|
| 8 |
+
from urllib.parse import quote
|
| 9 |
+
|
| 10 |
+
def is_smiles(text):
|
| 11 |
+
try:
|
| 12 |
+
m = Chem.MolFromSmiles(text, sanitize=False)
|
| 13 |
+
if m is None:
|
| 14 |
+
return False
|
| 15 |
+
return True
|
| 16 |
+
except:
|
| 17 |
+
return False
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
def is_multiple_smiles(text):
|
| 21 |
+
if is_smiles(text):
|
| 22 |
+
return "." in text
|
| 23 |
+
return False
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
def split_smiles(text):
|
| 27 |
+
return text.split(".")
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
def is_cas(text):
|
| 31 |
+
pattern = r"^\d{2,7}-\d{2}-\d$"
|
| 32 |
+
return re.match(pattern, text) is not None
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
def largest_mol(smiles):
|
| 36 |
+
ss = smiles.split(".")
|
| 37 |
+
ss.sort(key=lambda a: len(a))
|
| 38 |
+
while not is_smiles(ss[-1]):
|
| 39 |
+
rm = ss[-1]
|
| 40 |
+
ss.remove(rm)
|
| 41 |
+
return ss[-1]
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
def canonical_smiles(smiles):
|
| 45 |
+
try:
|
| 46 |
+
smi = Chem.MolToSmiles(Chem.MolFromSmiles(smiles), canonical=True)
|
| 47 |
+
return smi
|
| 48 |
+
except Exception:
|
| 49 |
+
return "Invalid SMILES string"
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
def tanimoto(s1, s2):
|
| 53 |
+
"""Calculate the Tanimoto similarity of two SMILES strings."""
|
| 54 |
+
try:
|
| 55 |
+
mol1 = Chem.MolFromSmiles(s1)
|
| 56 |
+
mol2 = Chem.MolFromSmiles(s2)
|
| 57 |
+
fp1 = AllChem.GetMorganFingerprintAsBitVect(mol1, 2, nBits=2048)
|
| 58 |
+
fp2 = AllChem.GetMorganFingerprintAsBitVect(mol2, 2, nBits=2048)
|
| 59 |
+
return DataStructs.TanimotoSimilarity(fp1, fp2)
|
| 60 |
+
except (TypeError, ValueError, AttributeError):
|
| 61 |
+
return "Error: Not a valid SMILES string"
|
| 62 |
+
|
| 63 |
+
def CIRconvert(ids):
|
| 64 |
+
|
| 65 |
+
url = 'http://cactus.nci.nih.gov/chemical/structure/' + quote(ids) + '/smiles'
|
| 66 |
+
ans = urlopen(url).read().decode('utf8')
|
| 67 |
+
return ans
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
def pubchem_query2smiles(
|
| 72 |
+
query: str,
|
| 73 |
+
url: str = "https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{}/{}",
|
| 74 |
+
) -> str:
|
| 75 |
+
if is_smiles(query):
|
| 76 |
+
if not is_multiple_smiles(query):
|
| 77 |
+
return query
|
| 78 |
+
else:
|
| 79 |
+
raise ValueError(
|
| 80 |
+
"Multiple SMILES strings detected, input one molecule at a time."
|
| 81 |
+
)
|
| 82 |
+
if url is None:
|
| 83 |
+
url = "https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{}/{}"
|
| 84 |
+
r = requests.get(url.format(query, "property/IsomericSMILES/JSON"))
|
| 85 |
+
# convert the response to a json object
|
| 86 |
+
data = r.json()
|
| 87 |
+
# return the SMILES string
|
| 88 |
+
try:
|
| 89 |
+
smi = data["PropertyTable"]["Properties"][0]["IsomericSMILES"]
|
| 90 |
+
except:
|
| 91 |
+
try:
|
| 92 |
+
smi = CIRconvert(query)
|
| 93 |
+
|
| 94 |
+
except KeyError:
|
| 95 |
+
return "Could not find a molecule matching the text. One possible cause is that the input is incorrect, input one molecule at a time."
|
| 96 |
+
return str(Chem.CanonSmiles(largest_mol(smi)))
|
| 97 |
+
|
| 98 |
+
|
| 99 |
+
def query2cas(query: str, url_cid: str, url_data: str):
|
| 100 |
+
try:
|
| 101 |
+
mode = "name"
|
| 102 |
+
if is_smiles(query):
|
| 103 |
+
if is_multiple_smiles(query):
|
| 104 |
+
raise ValueError(
|
| 105 |
+
"Multiple SMILES strings detected, input one molecule at a time."
|
| 106 |
+
)
|
| 107 |
+
mode = "smiles"
|
| 108 |
+
url_cid = url_cid.format(mode, query)
|
| 109 |
+
cid = requests.get(url_cid).json()["IdentifierList"]["CID"][0]
|
| 110 |
+
url_data = url_data.format(cid)
|
| 111 |
+
data = requests.get(url_data).json()
|
| 112 |
+
except (requests.exceptions.RequestException, KeyError):
|
| 113 |
+
raise ValueError("Invalid molecule input, no Pubchem entry")
|
| 114 |
+
|
| 115 |
+
try:
|
| 116 |
+
for section in data["Record"]["Section"]:
|
| 117 |
+
if section.get("TOCHeading") == "Names and Identifiers":
|
| 118 |
+
for subsection in section["Section"]:
|
| 119 |
+
if subsection.get("TOCHeading") == "Other Identifiers":
|
| 120 |
+
for subsubsection in subsection["Section"]:
|
| 121 |
+
if subsubsection.get("TOCHeading") == "CAS":
|
| 122 |
+
return subsubsection["Information"][0]["Value"][
|
| 123 |
+
"StringWithMarkup"
|
| 124 |
+
][0]["String"]
|
| 125 |
+
except KeyError:
|
| 126 |
+
raise ValueError("Invalid molecule input, no Pubchem entry")
|
| 127 |
+
|
| 128 |
+
raise ValueError("CAS number not found")
|
| 129 |
+
|
| 130 |
+
|
| 131 |
+
def smiles2name(smi, single_name=True):
|
| 132 |
+
"""This function queries the given molecule smiles and returns a name record or iupac"""
|
| 133 |
+
|
| 134 |
+
try:
|
| 135 |
+
smi = Chem.MolToSmiles(Chem.MolFromSmiles(smi), canonical=True)
|
| 136 |
+
except Exception:
|
| 137 |
+
raise ValueError("Invalid SMILES string")
|
| 138 |
+
# query the PubChem database
|
| 139 |
+
r = requests.get(
|
| 140 |
+
"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/"
|
| 141 |
+
+ smi
|
| 142 |
+
+ "/synonyms/JSON"
|
| 143 |
+
)
|
| 144 |
+
# convert the response to a json object
|
| 145 |
+
data = r.json()
|
| 146 |
+
# return the SMILES string
|
| 147 |
+
try:
|
| 148 |
+
if single_name:
|
| 149 |
+
index = 0
|
| 150 |
+
names = data["InformationList"]["Information"][0]["Synonym"]
|
| 151 |
+
while is_cas(name := names[index]):
|
| 152 |
+
index += 1
|
| 153 |
+
if index == len(names):
|
| 154 |
+
raise ValueError("No name found")
|
| 155 |
+
else:
|
| 156 |
+
name = data["InformationList"]["Information"][0]["Synonym"]
|
| 157 |
+
except KeyError:
|
| 158 |
+
raise ValueError("Unknown Molecule")
|
| 159 |
+
return name
|