Spaces:
Build error
Build error
File size: 21,395 Bytes
2d883ce 1c23f96 2d883ce ad0d142 fba478d 2d883ce 1c23f96 2d883ce fba478d 2d883ce 1c23f96 2d883ce 1c23f96 2d883ce 1c23f96 2d883ce 1c23f96 2d883ce 1c23f96 2d883ce 1c23f96 2d883ce 1c23f96 2d883ce 1c23f96 2d883ce 1c23f96 2d883ce eb0bd77 1c23f96 eb0bd77 1c23f96 eb0bd77 1c23f96 eb0bd77 2d883ce 1c23f96 2d883ce 1c23f96 2d883ce 1c23f96 2d883ce 1c23f96 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 |
from groq import Groq
import groq
import streamlit as st
from openai import OpenAI
import json
import streamlit.components.v1 as components
import requests
from youtube_transcript_api import YouTubeTranscriptApi
from youtubesearchpython import VideosSearch
from rdkit import Chem
from rdkit.Chem import Draw, AllChem
import os
import queue
import re
import tempfile
import threading
import requests
from bs4 import BeautifulSoup
from embedchain import App
from embedchain.config import BaseLlmConfig
from embedchain.helpers.callbacks import (StreamingStdOutCallbackHandlerYield,
generate)
client_groq = Groq(api_key=os.getenv('GROQ_API'))
client_openai = OpenAI(api_key=os.getenv('OPENAI_API'))
link_custom_functions = [
{
'name': 'extract_website_url',
'description': 'Get the website url',
'parameters': {
'type': 'object',
'properties': {
'link': {'type': 'string', 'description': 'website url'},
}
}
}
]
def embedchain_bot(db_path, api_key):
return App.from_config(
config={
"llm": {
"provider": "openai",
"config": {
"model": "gpt-3.5-turbo-1106",
"temperature": 0.5,
"max_tokens": 1000,
"top_p": 1,
"stream": True,
"api_key": api_key,
},
},
"vectordb": {
"provider": "chroma",
"config": {"collection_name": "chat-pdf", "dir": db_path, "allow_reset": True},
},
"embedder": {"provider": "openai", "config": {"api_key": api_key}},
"chunker": {"chunk_size": 2000, "chunk_overlap": 0, "length_function": "len"},
}
)
def get_db_path():
tmpdirname = tempfile.mkdtemp()
return tmpdirname
def get_ec_app(api_key):
if "app" in st.session_state:
print("Found app in session state")
app = st.session_state.app
else:
print("Creating app")
db_path = get_db_path()
app = embedchain_bot(db_path, api_key)
st.session_state.app = app
return app
def groq_response(content, prompt):
try:
response = client_groq.chat.completions.create(
messages=[
{
"role": "user",
"content": content + prompt,
}
],
model="mixtral-8x7b-32768",
)
return response.choices[0].message.content
except groq.APIConnectionError as e:
st.error("The server could not be reached, please try again later.")
except groq.RateLimitError as e:
st.error("You have exceeded the rate limit for the demo version, please try again in some time.")
# Initialize your clients with API keys
client_openai = OpenAI(api_key=os.getenv('OPENAI_API'))
client_groq = Groq(api_key=os.getenv('GROQ_API'))
client_groq_one = Groq(api_key=os.getenv('GROQ_API'))
# Define your custom functions for OpenAI
scenario_custom_functions = [
{
'name': 'extract_scenario_info',
'description': 'Get the individual scenarios text',
'parameters': {
'type': 'object',
'properties': {
'scenario_1': {'type': 'string', 'description': 'scenario number 1 full text'},
'scenario_2': {'type': 'string', 'description': 'scenario number 2 full text'},
'scenario_3': {'type': 'string', 'description': 'scenario number 3 full text'},
'scenario_4': {'type': 'string', 'description': 'scenario number 4 full text'},
}
}
}
]
scenario_keyword_functions = [
{
'name': 'extract_scenario_info',
'description': 'Get the individual scenarios text',
'parameters': {
'type': 'object',
'properties': {
'keyword_1': {'type': 'string', 'description': 'keyword 1'},
'keyword_2': {'type': 'string', 'description': 'keyword 2'},
'keyword_3': {'type': 'string', 'description': 'keyword 3'},
'keyword_4': {'type': 'string', 'description': 'keyword 4'},
}
}
}
]
video_custom_functions = [
{
'name': 'extract_video_id',
'description': 'Get the video ID',
'parameters': {
'type': 'object',
'properties': {
'video_id': {'type': 'string', 'description': 'video ID'},
}
}
}
]
# Initialize a string to store all transcripts
all_video_transcripts = ""
molecule_custom_functions = [
{
'name': 'extract_molecule_info',
'description': 'Get the molecule name',
'parameters': {
'type': 'object',
'properties': {
'molecule_name': {'type': 'string', 'description': 'name of the molecule'},
}
}
}
]
keyword_custom_functions = [
{
'name': 'extract_keyword_info',
'description': 'Get the search query keyword',
'parameters': {
'type': 'object',
'properties': {
'keyword': {'type': 'string', 'description': 'keyword of teh search query'},
}
}
}
]
# Example SMILES strings for each component - replace these with the actual values retrieved from your API calls
reactant_1_smiles = 'your_reactant_1_smiles_here'
reactant_2_smiles = 'your_reactant_2_smiles_here' # This might be an empty string if not present
reagent_3_smiles = 'your_reagent_3_smiles_here'
product_4_smiles = 'your_product_4_smiles_here'
product_5_smiles = 'your_product_5_smiles_here'
molecule_custom_functions_reaction = [
{
'name': 'extract_molecules_info',
'description': 'Get the name of the individual molecules',
'parameters': {
'type': 'object',
'properties': {
'reactant_1': {'type': 'string', 'description': 'reactant number 1 '},
'reactant_2': {'type': 'string', 'description': 'reactant number 2 '},
'reagent_3': {'type': 'string', 'description': 'reagent number 1 '},
'product_4': {'type': 'string', 'description': 'product number 1'},
'product_5': {'type': 'string', 'description': 'product number 2'},
}
}
}
]
# Streamlit UI
st.title("Stereo World Updated π")
image_variable = None
# Session states initialization
if 'prompt' not in st.session_state:
st.session_state.prompt = ''
if 'selected_options' not in st.session_state:
st.session_state.selected_options = []
if 'selected_options_reaction' not in st.session_state:
st.session_state.selected_options_reaction = []
# User inputs
st.session_state.selected_options = st.multiselect("Select options", ["fun based", "context based", "real world based", "conceptual textbook based"])
st.session_state.prompt = st.text_input("Enter your prompt")
check_box = st.checkbox("Open Chem Sketcher")
with st.sidebar:
st.sidebar.title("Chat with the assistant π€")
# Input for search query
search_query = st.sidebar.text_input("Enter your video search query")
reaction_query = st.sidebar.text_input("Enter your reaction search query")
name_reaction = st.checkbox("I am searching a name reaction")
if reaction_query:
prompt = reaction_query
content = "please give complete step by step reaction along with the complete name of the molecules for the reaction, the requested reaction is : "
response = groq_response(content, prompt)
response_functions = client_openai.chat.completions.create(
model="gpt-3.5-turbo",
messages=[{'role': 'user', 'content': response}],
functions=molecule_custom_functions_reaction,
function_call='auto'
)
data = json.loads(response_functions.choices[0].message.function_call.arguments)
reactant_1 = data.get('reactant_1', '')
reactant_2 = data.get('reactant_2', '')
reagent_3 = data.get('reagent_3', '')
product_4 = data.get('product_4', '')
product_5 = data.get('product_5', '')
reactant_1_smiles = requests.get(f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{reactant_1}/property/CanonicalSMILES/TXT").text if requests.get(f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{reactant_1}/property/CanonicalSMILES/TXT").status_code == 200 else ''
reactant_2_smiles = requests.get(f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{reactant_2}/property/CanonicalSMILES/TXT").text if requests.get(f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{reactant_2}/property/CanonicalSMILES/TXT").status_code == 200 else ''
reagent_3_smiles = requests.get(f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{reagent_3}/property/CanonicalSMILES/TXT").text if requests.get(f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{reagent_3}/property/CanonicalSMILES/TXT").status_code == 200 else ''
product_4_smiles = requests.get(f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{product_4}/property/CanonicalSMILES/TXT").text if requests.get(f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{product_4}/property/CanonicalSMILES/TXT").status_code == 200 else ''
product_5_smiles = requests.get(f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{product_5}/property/CanonicalSMILES/TXT").text if requests.get(f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{product_5}/property/CanonicalSMILES/TXT").status_code == 200 else ''
#st.write("Reactant 1: ", reactant_1_smiles)
#st.write("Reactant 2: ", reactant_2_smiles)
#st.write("Reagent 3: ", reagent_3_smiles)
#st.write("Product 4: ", product_4_smiles)
#st.write("Product 5: ", product_5_smiles)
# Building the reaction SMILES string dynamically based on available components
# Building the reaction SMILES string
reaction_components = []
# Adding reactants
reactants = [reactant for reactant in [reactant_1_smiles, reactant_2_smiles] if reactant]
if reactants:
reaction_components.append('.'.join(reactants))
else:
reaction_components.append('')
# Adding reagents
reagents = [reagent for reagent in [reagent_3_smiles] if reagent]
if reagents:
reaction_components.append('.'.join(reagents))
else:
reaction_components.append('')
# Adding products
products = [product for product in [product_4_smiles, product_5_smiles] if product]
if products:
reaction_components.append('.'.join(products))
else:
reaction_components.append('')
reaction_smiles = '>'.join(reaction_components)
try:
# Generate the reaction from SMILES
rxn = AllChem.ReactionFromSmarts(reaction_smiles, useSmiles=True)
# Draw the reaction
d2d = Draw.MolDraw2DCairo(800, 300) # Adjust size as needed
d2d.DrawReaction(rxn)
png = d2d.GetDrawingText()
# Save the drawing to a file
with open('reaction_image.png', 'wb+') as f:
f.write(png)
image_variable = png
#st.image('reaction_image.png')
except Exception as e:
st.write(f"An error occurred: {e}")
if search_query:
prompt = search_query
content = "please correct the spelling and write teh precise one search keyword for and only give teh keyword, only 1 and nothing else other that that : "
response = groq_response(content, prompt)
response_functions = client_openai.chat.completions.create(
model="gpt-3.5-turbo",
messages=[{'role': 'user', 'content': response}],
functions=keyword_custom_functions,
function_call='auto'
)
data = json.loads(response_functions.choices[0].message.function_call.arguments)
keyword = data['keyword']
st.sidebar.write(keyword)
# Perform the search
videosSearch = VideosSearch(search_query, limit=3)
video_one = VideosSearch(search_query, limit=1)
for video in video_one.result()['result']:
video_one_id = video['id']
for video in videosSearch.result()['result']:
video_id = video['id'] # Extract video ID
# Display the video thumbnail
#st.image(video['thumbnails'][0]['url'])
# Display the video title
#st.write(f"**{video['title']}**")
try:
# Fetch the transcript for the video ID
transcript_list = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'])
# Concatenating all text from the transcript
transcript_text = "\n".join([t['text'] for t in transcript_list])
# Concatenate the transcript to the all_video_transcripts variable
all_video_transcripts += f"\n---\nTranscript for Video ID {video_id}:\n{transcript_text}\n---\n"
except Exception as e:
error_message = "Transcript not available or error in fetching transcript."
# Concatenate the error message to the all_video_transcripts variable
all_video_transcripts += f"\n---\nTranscript for Video ID {video_id}:\n{error_message}\n---\n"
# At this point, all_video_transcripts contains transcripts for all videos concatenated as a single string.
# You can display it or process it as needed.
# Here's an example of displaying the combined transcripts:
video_id = ""
if all_video_transcripts:
#st.text_area("All Video Transcripts", all_video_transcripts, height=300)
prompt = all_video_transcripts
content = "write a one sentence summary for the the given videos and always preserve and give me the vido_id always "
video_compression = groq_response(content, prompt)
compressed_transcripts = video_compression
prompt = compressed_transcripts
content = "give me the best video with maximum content and the best keywords from the transcript and always preserve and give me teh vido_id always "
chat_completion = groq_response(content, prompt)
#st.write(chat_completion.choices[0].message.content)
video_id_fetch = chat_completion
#st.write(video_id_fetch)
response_functions = client_openai.chat.completions.create(
model="gpt-3.5-turbo",
messages=[{'role': 'user', 'content': video_id_fetch}],
functions=video_custom_functions,
function_call='auto'
)
data = json.loads(response_functions.choices[0].message.function_call.arguments)
video_id = data['video_id']
st.video(f"https://www.youtube.com/watch?v={video_id}")
messages = st.container(height=630)
if image_variable:
messages.chat_message("assistant").write(f"When you react {reactant_1} with {reactant_2} using {reagent_3}, you get {product_4} and {product_5}" + " here is the reaction in 2D bond representation:")
messages.image(image_variable)
if check_box:
messages.chat_message("assistant").write("Here is the Chem Sketcher for you to draw the molecule:")
with messages.chat_message("assistant"):
components.iframe("https://marvinjs.chemicalize.com/v1/fcc0cc8570204c48a6447859c71cf611/editor.html?frameId=2cd5fd97-f496-4b6f-8cbc-417acc66684f&origin=https%3A%2F%2Fwww.rcsb.org", height=600)
prompt_sidebar = st.chat_input("Say something")
if prompt_sidebar:
messages.chat_message("user").write(prompt_sidebar)
prompt = prompt_sidebar
sidebar_chat = groq_response("please answer thsi query : ", prompt)
response_functions = client_openai.chat.completions.create(
model="gpt-3.5-turbo",
messages=[{'role': 'user', 'content': prompt_sidebar}],
functions=molecule_custom_functions,
function_call='auto'
)
try:
arguments = response_functions.choices[0].message.function_call.arguments
if arguments is not None:
data = json.loads(arguments)
molecule_name = data['molecule_name','']
if molecule_name:
response = requests.get(f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{molecule_name}/cids/TXT")
cid = response.text
with messages.chat_message("assistant"):
st.write(f"Here is the molecule {molecule_name} in 3D you can interact with it too π:")
components.iframe(f"https://embed.molview.org/v1/?mode=balls&cid={cid}")
messages.chat_message("assistant").write(sidebar_chat)
else:
data = json.loads(arguments)
molecule_name = data['molecule_name','']
if molecule_name:
response = requests.get(f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{molecule_name}/cids/TXT")
cid = response.text
with messages.chat_message("assistant"):
st.write(f"Here is the molecule {molecule_name} in 3D you can interact with it too π:")
components.iframe(f"https://embed.molview.org/v1/?mode=balls&cid={cid}")
messages.chat_message("assistant").write(sidebar_chat)
data = {} # or set to None or a default value as needed
except (IndexError, KeyError, TypeError) as e:
#print(f"Error accessing the data: {e}")
messages.chat_message("assistant").write(sidebar_chat)
data = {}
if st.session_state.prompt:
prompt = st.session_state.prompt
selected_options = " ".join(st.session_state.selected_options)
messages = [
{"role": "user", "content": f"create a {selected_options} scenarios based task question for learning stereochemistry, create 4 scenarios each time and number them: {prompt}"},
]
chat_completion = client_groq.chat.completions.create(
messages=messages,
model="mixtral-8x7b-32768",
)
response = chat_completion.choices[0].message.content
if response:
response_functions = client_openai.chat.completions.create(
model="gpt-3.5-turbo",
messages=[{'role': 'user', 'content': response}],
functions=scenario_custom_functions,
function_call='auto'
)
data = json.loads(response_functions.choices[0].message.function_call.arguments)
# Tabs for scenarios
scenario_tabs = ['Scenario 1', 'Scenario 2', 'Scenario 3', 'Scenario 4']
tabs = st.tabs(scenario_tabs)
for i, tab in enumerate(tabs):
with tab:
st.header(scenario_tabs[i])
scenario_text = data[f'scenario_{i+1}']
st.write(scenario_text)
prompt = scenario_text
content = "subdivide this scenario into three subquestions and only give the questions. The scenario is: "
chat_completion_subquestions = groq_response(content, prompt)
scenario_generated = chat_completion_subquestions
st.write(scenario_generated)
prompt = scenario_generated
content = "give a sample ideal step-by-step format to attempt to answer this scenario question as a hint. Scenario: "
chat_completion_hint = groq_response(content, prompt)
st.text_area("Enter your answer here", key=f'answer_{i}')
with st.expander("See hint for answering the question" + str(i+1) + "π"):
st.write(chat_completion_hint)
# Upload PDF button
uploaded_file = st.file_uploader("Upload your answer (PDF)", type="pdf", key=f"pdf_uploader_{i}")
if uploaded_file is not None:
st.success("File uploaded successfully!")
col1, col2 = st.columns(2)
with col1:
with st.expander("See explanation 3D"):
components.iframe("https://embed.molview.org/v1/?mode=balls&cid=124527813")
with col2:
with st.expander("See explanation 2D"):
components.iframe("https://marvinjs.chemicalize.com/v1/fcc0cc8570204c48a6447859c71cf611/editor.html?frameId=2cd5fd97-f496-4b6f-8cbc-417acc66684f&origin=https%3A%2F%2Fwww.rcsb.org")
# Example of error handling with client_groq API calls |