File size: 21,395 Bytes
2d883ce
1c23f96
2d883ce
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ad0d142
fba478d
 
2d883ce
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1c23f96
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2d883ce
fba478d
 
 
2d883ce
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1c23f96
2d883ce
 
 
 
 
 
 
 
 
 
 
 
1c23f96
2d883ce
 
 
 
 
 
 
 
1c23f96
 
 
2d883ce
 
1c23f96
2d883ce
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1c23f96
 
 
2d883ce
 
1c23f96
2d883ce
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1c23f96
 
 
 
 
 
 
2d883ce
1c23f96
2d883ce
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1c23f96
 
2d883ce
 
 
 
 
 
eb0bd77
 
 
 
1c23f96
 
 
 
 
 
 
 
eb0bd77
1c23f96
 
 
 
 
 
 
 
 
eb0bd77
 
 
1c23f96
eb0bd77
 
2d883ce
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1c23f96
 
 
 
2d883ce
1c23f96
 
 
2d883ce
 
 
1c23f96
2d883ce
 
 
 
 
 
 
 
 
 
 
 
 
1c23f96
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
from groq import Groq
import groq
import streamlit as st
from openai import OpenAI
import json
import streamlit.components.v1 as components
import requests
from youtube_transcript_api import YouTubeTranscriptApi
from youtubesearchpython import VideosSearch
from rdkit import Chem
from rdkit.Chem import Draw, AllChem
import os
import queue
import re
import tempfile
import threading
import requests
from bs4 import BeautifulSoup
from embedchain import App
from embedchain.config import BaseLlmConfig
from embedchain.helpers.callbacks import (StreamingStdOutCallbackHandlerYield,
                                          generate)

    
client_groq = Groq(api_key=os.getenv('GROQ_API'))
client_openai = OpenAI(api_key=os.getenv('OPENAI_API'))

link_custom_functions = [
    {
        'name': 'extract_website_url',
        'description': 'Get the website url',
        'parameters': {
            'type': 'object',
            'properties': {
                'link': {'type': 'string', 'description': 'website url'},
        }
    }
    }
]

def embedchain_bot(db_path, api_key):
    return App.from_config(
        config={
            "llm": {
                "provider": "openai",
                "config": {
                    "model": "gpt-3.5-turbo-1106",
                    "temperature": 0.5,
                    "max_tokens": 1000,
                    "top_p": 1,
                    "stream": True,
                    "api_key": api_key,
                },
            },
            "vectordb": {
                "provider": "chroma",
                "config": {"collection_name": "chat-pdf", "dir": db_path, "allow_reset": True},
            },
            "embedder": {"provider": "openai", "config": {"api_key": api_key}},
            "chunker": {"chunk_size": 2000, "chunk_overlap": 0, "length_function": "len"},
        }
    )


def get_db_path():
    tmpdirname = tempfile.mkdtemp()
    return tmpdirname


def get_ec_app(api_key):
    if "app" in st.session_state:
        print("Found app in session state")
        app = st.session_state.app
    else:
        print("Creating app")
        db_path = get_db_path()
        app = embedchain_bot(db_path, api_key)
        st.session_state.app = app
    return app

def groq_response(content, prompt):
    try:
        response = client_groq.chat.completions.create(
            messages=[
                {
                    "role": "user",
                    "content": content + prompt,
                }
            ],
            model="mixtral-8x7b-32768",
        )
        return response.choices[0].message.content
    except groq.APIConnectionError as e:
        st.error("The server could not be reached, please try again later.")
    except  groq.RateLimitError as e:
        st.error("You have exceeded the rate limit for the demo version, please try again in some time.")    


# Initialize your clients with API keys
client_openai = OpenAI(api_key=os.getenv('OPENAI_API'))
client_groq = Groq(api_key=os.getenv('GROQ_API'))
client_groq_one = Groq(api_key=os.getenv('GROQ_API'))

# Define your custom functions for OpenAI
scenario_custom_functions = [
    {
        'name': 'extract_scenario_info',
        'description': 'Get the individual scenarios text',
        'parameters': {
            'type': 'object',
            'properties': {
                'scenario_1': {'type': 'string', 'description': 'scenario number 1 full text'},
                'scenario_2': {'type': 'string', 'description': 'scenario number 2 full text'},
                'scenario_3': {'type': 'string', 'description': 'scenario number 3 full text'},
                'scenario_4': {'type': 'string', 'description': 'scenario number 4 full text'},
            }
        }
    }
]

scenario_keyword_functions = [
    {
        'name': 'extract_scenario_info',
        'description': 'Get the individual scenarios text',
        'parameters': {
            'type': 'object',
            'properties': {
                'keyword_1': {'type': 'string', 'description': 'keyword 1'},
                'keyword_2': {'type': 'string', 'description': 'keyword 2'},
                'keyword_3': {'type': 'string', 'description': 'keyword 3'},
                'keyword_4': {'type': 'string', 'description': 'keyword 4'},
            }
        }
    }
]

video_custom_functions = [
    {
        'name': 'extract_video_id',
        'description': 'Get the video ID',
        'parameters': {
            'type': 'object',
            'properties': {
                'video_id': {'type': 'string', 'description': 'video ID'},
        }
    }
    }
]
# Initialize a string to store all transcripts
all_video_transcripts = ""

molecule_custom_functions = [
    {
        'name': 'extract_molecule_info',
        'description': 'Get the molecule name',
        'parameters': {
            'type': 'object',
            'properties': {
                'molecule_name': {'type': 'string', 'description': 'name of the molecule'},
        }
    }
    }
]

keyword_custom_functions = [
    {
        'name': 'extract_keyword_info',
        'description': 'Get the search query keyword',
        'parameters': {
            'type': 'object',
            'properties': {
                'keyword': {'type': 'string', 'description': 'keyword of teh search query'},
        }
    }
    }
]

# Example SMILES strings for each component - replace these with the actual values retrieved from your API calls
reactant_1_smiles = 'your_reactant_1_smiles_here'
reactant_2_smiles = 'your_reactant_2_smiles_here'  # This might be an empty string if not present
reagent_3_smiles = 'your_reagent_3_smiles_here'
product_4_smiles = 'your_product_4_smiles_here'
product_5_smiles = 'your_product_5_smiles_here'
molecule_custom_functions_reaction = [
    {
        'name': 'extract_molecules_info',
        'description': 'Get the name of the individual molecules',
        'parameters': {
            'type': 'object',
            'properties': {
                'reactant_1': {'type': 'string', 'description': 'reactant number 1 '},
                'reactant_2': {'type': 'string', 'description': 'reactant number 2 '},
                'reagent_3': {'type': 'string', 'description': 'reagent number 1 '},
                'product_4': {'type': 'string', 'description': 'product number 1'},
                'product_5': {'type': 'string', 'description': 'product number 2'},
            }
        }
    }
]

# Streamlit UI
st.title("Stereo World Updated 🌍")
image_variable = None
# Session states initialization
if 'prompt' not in st.session_state:
    st.session_state.prompt = ''
if 'selected_options' not in st.session_state:
    st.session_state.selected_options = []
if 'selected_options_reaction' not in st.session_state:
    st.session_state.selected_options_reaction = []


# User inputs
st.session_state.selected_options = st.multiselect("Select options", ["fun based", "context based", "real world based", "conceptual textbook based"])
st.session_state.prompt = st.text_input("Enter your prompt")
check_box = st.checkbox("Open Chem Sketcher")
with st.sidebar:
    st.sidebar.title("Chat with the assistant πŸ€–")
    # Input for search query
    search_query = st.sidebar.text_input("Enter your video search query")
    reaction_query = st.sidebar.text_input("Enter your reaction search query")
    name_reaction = st.checkbox("I am searching a name reaction")
    if reaction_query:
        prompt = reaction_query
        content = "please give complete step by step reaction along with the complete name of the molecules for the reaction, the requested reaction is : "
        response = groq_response(content, prompt)
        response_functions = client_openai.chat.completions.create(
                    model="gpt-3.5-turbo",
                    messages=[{'role': 'user', 'content': response}],
                    functions=molecule_custom_functions_reaction,
                    function_call='auto'
                )
        data = json.loads(response_functions.choices[0].message.function_call.arguments)
        reactant_1 = data.get('reactant_1', '')
        reactant_2 = data.get('reactant_2', '')
        reagent_3 = data.get('reagent_3', '')
        product_4 = data.get('product_4', '')
        product_5 = data.get('product_5', '')
        
        reactant_1_smiles = requests.get(f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{reactant_1}/property/CanonicalSMILES/TXT").text if requests.get(f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{reactant_1}/property/CanonicalSMILES/TXT").status_code == 200 else ''
        reactant_2_smiles = requests.get(f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{reactant_2}/property/CanonicalSMILES/TXT").text if requests.get(f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{reactant_2}/property/CanonicalSMILES/TXT").status_code == 200 else ''
        reagent_3_smiles = requests.get(f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{reagent_3}/property/CanonicalSMILES/TXT").text if requests.get(f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{reagent_3}/property/CanonicalSMILES/TXT").status_code == 200 else ''
        product_4_smiles = requests.get(f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{product_4}/property/CanonicalSMILES/TXT").text if requests.get(f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{product_4}/property/CanonicalSMILES/TXT").status_code == 200 else ''
        product_5_smiles = requests.get(f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{product_5}/property/CanonicalSMILES/TXT").text if requests.get(f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{product_5}/property/CanonicalSMILES/TXT").status_code == 200 else ''
    
        #st.write("Reactant 1: ", reactant_1_smiles)
        #st.write("Reactant 2: ", reactant_2_smiles)
        #st.write("Reagent 3: ", reagent_3_smiles)
        #st.write("Product 4: ", product_4_smiles)
        #st.write("Product 5: ", product_5_smiles)
        # Building the reaction SMILES string dynamically based on available components
        # Building the reaction SMILES string
        reaction_components = []

        # Adding reactants
        reactants = [reactant for reactant in [reactant_1_smiles, reactant_2_smiles] if reactant]
        if reactants:
            reaction_components.append('.'.join(reactants))
        else:
            reaction_components.append('')

        # Adding reagents
        reagents = [reagent for reagent in [reagent_3_smiles] if reagent]
        if reagents:
            reaction_components.append('.'.join(reagents))
        else:
            reaction_components.append('')

        # Adding products
        products = [product for product in [product_4_smiles, product_5_smiles] if product]
        if products:
            reaction_components.append('.'.join(products))
        else:
            reaction_components.append('')

        reaction_smiles = '>'.join(reaction_components)
        try:
            # Generate the reaction from SMILES
            rxn = AllChem.ReactionFromSmarts(reaction_smiles, useSmiles=True)
            
            # Draw the reaction
            d2d = Draw.MolDraw2DCairo(800, 300)  # Adjust size as needed
            d2d.DrawReaction(rxn)
            png = d2d.GetDrawingText()
            
            # Save the drawing to a file
            with open('reaction_image.png', 'wb+') as f:
                f.write(png)
            image_variable = png
                #st.image('reaction_image.png')
                
                
        except Exception as e:
            st.write(f"An error occurred: {e}")
    if search_query:
        prompt = search_query
        content = "please correct the spelling and write teh precise one search keyword for and only give teh keyword, only 1 and nothing else other that that : "
        response = groq_response(content, prompt)
        response_functions = client_openai.chat.completions.create(
                model="gpt-3.5-turbo",
                messages=[{'role': 'user', 'content': response}],
                functions=keyword_custom_functions,
                function_call='auto'
            )
        data = json.loads(response_functions.choices[0].message.function_call.arguments)
        keyword = data['keyword']
        st.sidebar.write(keyword)
        # Perform the search
        videosSearch = VideosSearch(search_query, limit=3)
        video_one = VideosSearch(search_query, limit=1)
        for video in video_one.result()['result']:
            video_one_id = video['id']
        
        for video in videosSearch.result()['result']:
            video_id = video['id']  # Extract video ID
            
            # Display the video thumbnail
            #st.image(video['thumbnails'][0]['url'])
            
            # Display the video title
            #st.write(f"**{video['title']}**")
            
            try:
                # Fetch the transcript for the video ID
                transcript_list = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'])
                
                # Concatenating all text from the transcript
                transcript_text = "\n".join([t['text'] for t in transcript_list])
                
                # Concatenate the transcript to the all_video_transcripts variable
                all_video_transcripts += f"\n---\nTranscript for Video ID {video_id}:\n{transcript_text}\n---\n"
                
            except Exception as e:
                error_message = "Transcript not available or error in fetching transcript."
                # Concatenate the error message to the all_video_transcripts variable
                all_video_transcripts += f"\n---\nTranscript for Video ID {video_id}:\n{error_message}\n---\n"

    # At this point, all_video_transcripts contains transcripts for all videos concatenated as a single string.
    # You can display it or process it as needed.
    # Here's an example of displaying the combined transcripts:
    video_id = ""
    if all_video_transcripts:
        #st.text_area("All Video Transcripts", all_video_transcripts, height=300)
        prompt = all_video_transcripts
        content = "write a one sentence summary for the the given videos and always preserve and give me the vido_id always "
        video_compression = groq_response(content, prompt)
        compressed_transcripts = video_compression
        prompt = compressed_transcripts
        content = "give me the best video with maximum content and the best keywords from the transcript and always preserve and give me teh vido_id always "
        chat_completion = groq_response(content, prompt)
        #st.write(chat_completion.choices[0].message.content)
        video_id_fetch = chat_completion
        #st.write(video_id_fetch)
        response_functions = client_openai.chat.completions.create(
                model="gpt-3.5-turbo",
                messages=[{'role': 'user', 'content': video_id_fetch}],
                functions=video_custom_functions,
                function_call='auto'
            )
        data = json.loads(response_functions.choices[0].message.function_call.arguments)
        video_id = data['video_id']
        st.video(f"https://www.youtube.com/watch?v={video_id}")

    messages = st.container(height=630)
    if image_variable:
        messages.chat_message("assistant").write(f"When you react {reactant_1} with {reactant_2} using {reagent_3}, you get {product_4} and {product_5}" + " here is the reaction in 2D bond representation:")
        messages.image(image_variable)
    if check_box:
        messages.chat_message("assistant").write("Here is the Chem Sketcher for you to draw the molecule:")
        with messages.chat_message("assistant"):
            components.iframe("https://marvinjs.chemicalize.com/v1/fcc0cc8570204c48a6447859c71cf611/editor.html?frameId=2cd5fd97-f496-4b6f-8cbc-417acc66684f&origin=https%3A%2F%2Fwww.rcsb.org", height=600)     
    prompt_sidebar = st.chat_input("Say something")
    if prompt_sidebar:
        messages.chat_message("user").write(prompt_sidebar)
        prompt = prompt_sidebar
        sidebar_chat = groq_response("please answer thsi query : ", prompt)
        response_functions = client_openai.chat.completions.create(
            model="gpt-3.5-turbo",
            messages=[{'role': 'user', 'content': prompt_sidebar}],
            functions=molecule_custom_functions,
            function_call='auto'
        )
        try:
            arguments = response_functions.choices[0].message.function_call.arguments
            if arguments is not None:
                data = json.loads(arguments)
                molecule_name = data['molecule_name','']
                if molecule_name:
                    response = requests.get(f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{molecule_name}/cids/TXT")
                    cid = response.text
                    with messages.chat_message("assistant"):
                        st.write(f"Here is the molecule {molecule_name} in 3D you can interact with it too πŸ˜‰:")
                        components.iframe(f"https://embed.molview.org/v1/?mode=balls&cid={cid}")
                        messages.chat_message("assistant").write(sidebar_chat) 
            else:
                data = json.loads(arguments)
                molecule_name = data['molecule_name','']
                if molecule_name:
                    response = requests.get(f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{molecule_name}/cids/TXT")
                    cid = response.text
                    with messages.chat_message("assistant"):
                        st.write(f"Here is the molecule {molecule_name} in 3D you can interact with it too πŸ˜‰:")
                        components.iframe(f"https://embed.molview.org/v1/?mode=balls&cid={cid}")
                        messages.chat_message("assistant").write(sidebar_chat)
                data = {}  # or set to None or a default value as needed
        except (IndexError, KeyError, TypeError) as e:
            #print(f"Error accessing the data: {e}")
            messages.chat_message("assistant").write(sidebar_chat)
            data = {}    
            
if st.session_state.prompt:
        prompt = st.session_state.prompt
        selected_options = " ".join(st.session_state.selected_options)
        messages = [
            {"role": "user", "content": f"create a {selected_options} scenarios based task question for learning stereochemistry, create 4 scenarios each time and number them: {prompt}"},
        ]
        chat_completion = client_groq.chat.completions.create(
            messages=messages,
            model="mixtral-8x7b-32768",
        )
        response = chat_completion.choices[0].message.content

        if response:
            response_functions = client_openai.chat.completions.create(
                model="gpt-3.5-turbo",
                messages=[{'role': 'user', 'content': response}],
                functions=scenario_custom_functions,
                function_call='auto'
            )
            data = json.loads(response_functions.choices[0].message.function_call.arguments)

            # Tabs for scenarios
            scenario_tabs = ['Scenario 1', 'Scenario 2', 'Scenario 3', 'Scenario 4']
            tabs = st.tabs(scenario_tabs)
            for i, tab in enumerate(tabs):
                with tab:
                    st.header(scenario_tabs[i])
                    scenario_text = data[f'scenario_{i+1}']
                    st.write(scenario_text)
                    prompt = scenario_text
                    content = "subdivide this scenario into three subquestions and only give the questions. The scenario is: "
                    chat_completion_subquestions = groq_response(content, prompt)
                    scenario_generated = chat_completion_subquestions
                    st.write(scenario_generated)
                    prompt = scenario_generated
                    content = "give a sample ideal step-by-step format to attempt to answer this scenario question as a hint. Scenario: "
                    chat_completion_hint = groq_response(content, prompt)
                    st.text_area("Enter your answer here", key=f'answer_{i}')
                    
                    with st.expander("See hint for answering the question" + str(i+1) + "πŸ˜€"): 
                        st.write(chat_completion_hint)
                    # Upload PDF button
                    uploaded_file = st.file_uploader("Upload your answer (PDF)", type="pdf", key=f"pdf_uploader_{i}")
                    if uploaded_file is not None:
                        st.success("File uploaded successfully!")
                        

                    col1, col2 = st.columns(2)
                    with col1:
                        with st.expander("See explanation 3D"):
                            components.iframe("https://embed.molview.org/v1/?mode=balls&cid=124527813")
                    with col2:
                        with st.expander("See explanation 2D"):
                            components.iframe("https://marvinjs.chemicalize.com/v1/fcc0cc8570204c48a6447859c71cf611/editor.html?frameId=2cd5fd97-f496-4b6f-8cbc-417acc66684f&origin=https%3A%2F%2Fwww.rcsb.org")

# Example of error handling with client_groq API calls