Spaces:

smitathkr1
/

ai-learning-platform

Build error

File size: 21,395 Bytes

from groq import Groq
import groq
import streamlit as st
from openai import OpenAI
import json
import streamlit.components.v1 as components
import requests
from youtube_transcript_api import YouTubeTranscriptApi
from youtubesearchpython import VideosSearch
from rdkit import Chem
from rdkit.Chem import Draw, AllChem
import os
import queue
import re
import tempfile
import threading
import requests
from bs4 import BeautifulSoup
from embedchain import App
from embedchain.config import BaseLlmConfig
from embedchain.helpers.callbacks import (StreamingStdOutCallbackHandlerYield,
                                          generate)

    
client_groq = Groq(api_key=os.getenv('GROQ_API'))
client_openai = OpenAI(api_key=os.getenv('OPENAI_API'))

link_custom_functions = [
    {
        'name': 'extract_website_url',
        'description': 'Get the website url',
        'parameters': {
            'type': 'object',
            'properties': {
                'link': {'type': 'string', 'description': 'website url'},
        }
    }
    }
]

def embedchain_bot(db_path, api_key):
    return App.from_config(
        config={
            "llm": {
                "provider": "openai",
                "config": {
                    "model": "gpt-3.5-turbo-1106",
                    "temperature": 0.5,
                    "max_tokens": 1000,
                    "top_p": 1,
                    "stream": True,
                    "api_key": api_key,
                },
            },
            "vectordb": {
                "provider": "chroma",
                "config": {"collection_name": "chat-pdf", "dir": db_path, "allow_reset": True},
            },
            "embedder": {"provider": "openai", "config": {"api_key": api_key}},
            "chunker": {"chunk_size": 2000, "chunk_overlap": 0, "length_function": "len"},
        }
    )


def get_db_path():
    tmpdirname = tempfile.mkdtemp()
    return tmpdirname


def get_ec_app(api_key):
    if "app" in st.session_state:
        print("Found app in session state")
        app = st.session_state.app
    else:
        print("Creating app")
        db_path = get_db_path()
        app = embedchain_bot(db_path, api_key)
        st.session_state.app = app
    return app

def groq_response(content, prompt):
    try:
        response = client_groq.chat.completions.create(
            messages=[
                {
                    "role": "user",
                    "content": content + prompt,
                }
            ],
            model="mixtral-8x7b-32768",
        )
        return response.choices[0].message.content
    except groq.APIConnectionError as e:
        st.error("The server could not be reached, please try again later.")
    except  groq.RateLimitError as e:
        st.error("You have exceeded the rate limit for the demo version, please try again in some time.")    


# Initialize your clients with API keys
client_openai = OpenAI(api_key=os.getenv('OPENAI_API'))
client_groq = Groq(api_key=os.getenv('GROQ_API'))
client_groq_one = Groq(api_key=os.getenv('GROQ_API'))

# Define your custom functions for OpenAI
scenario_custom_functions = [
    {
        'name': 'extract_scenario_info',
        'description': 'Get the individual scenarios text',
        'parameters': {
            'type': 'object',
            'properties': {
                'scenario_1': {'type': 'string', 'description': 'scenario number 1 full text'},
                'scenario_2': {'type': 'string', 'description': 'scenario number 2 full text'},
                'scenario_3': {'type': 'string', 'description': 'scenario number 3 full text'},
                'scenario_4': {'type': 'string', 'description': 'scenario number 4 full text'},
            }
        }
    }
]

scenario_keyword_functions = [
    {
        'name': 'extract_scenario_info',
        'description': 'Get the individual scenarios text',
        'parameters': {
            'type': 'object',
            'properties': {
                'keyword_1': {'type': 'string', 'description': 'keyword 1'},
                'keyword_2': {'type': 'string', 'description': 'keyword 2'},
                'keyword_3': {'type': 'string', 'description': 'keyword 3'},
                'keyword_4': {'type': 'string', 'description': 'keyword 4'},
            }
        }
    }
]

video_custom_functions = [
    {
        'name': 'extract_video_id',
        'description': 'Get the video ID',
        'parameters': {
            'type': 'object',
            'properties': {
                'video_id': {'type': 'string', 'description': 'video ID'},
        }
    }
    }
]
# Initialize a string to store all transcripts
all_video_transcripts = ""

molecule_custom_functions = [
    {
        'name': 'extract_molecule_info',
        'description': 'Get the molecule name',
        'parameters': {
            'type': 'object',
            'properties': {
                'molecule_name': {'type': 'string', 'description': 'name of the molecule'},
        }
    }
    }
]

keyword_custom_functions = [
    {
        'name': 'extract_keyword_info',
        'description': 'Get the search query keyword',
        'parameters': {
            'type': 'object',
            'properties': {
                'keyword': {'type': 'string', 'description': 'keyword of teh search query'},
        }
    }
    }
]

# Example SMILES strings for each component - replace these with the actual values retrieved from your API calls
reactant_1_smiles = 'your_reactant_1_smiles_here'
reactant_2_smiles = 'your_reactant_2_smiles_here'  # This might be an empty string if not present
reagent_3_smiles = 'your_reagent_3_smiles_here'
product_4_smiles = 'your_product_4_smiles_here'
product_5_smiles = 'your_product_5_smiles_here'
molecule_custom_functions_reaction = [
    {
        'name': 'extract_molecules_info',
        'description': 'Get the name of the individual molecules',
        'parameters': {
            'type': 'object',
            'properties': {
                'reactant_1': {'type': 'string', 'description': 'reactant number 1 '},
                'reactant_2': {'type': 'string', 'description': 'reactant number 2 '},
                'reagent_3': {'type': 'string', 'description': 'reagent number 1 '},
                'product_4': {'type': 'string', 'description': 'product number 1'},
                'product_5': {'type': 'string', 'description': 'product number 2'},
            }
        }
    }
]

# Streamlit UI
st.title("Stereo World Updated 🌍")
image_variable = None
# Session states initialization
if 'prompt' not in st.session_state:
    st.session_state.prompt = ''
if 'selected_options' not in st.session_state:
    st.session_state.selected_options = []
if 'selected_options_reaction' not in st.session_state:
    st.session_state.selected_options_reaction = []


# User inputs
st.session_state.selected_options = st.multiselect("Select options", ["fun based", "context based", "real world based", "conceptual textbook based"])
st.session_state.prompt = st.text_input("Enter your prompt")
check_box = st.checkbox("Open Chem Sketcher")
with st.sidebar:
    st.sidebar.title("Chat with the assistant 🤖")
    # Input for search query
    search_query = st.sidebar.text_input("Enter your video search query")
    reaction_query = st.sidebar.text_input("Enter your reaction search query")
    name_reaction = st.checkbox("I am searching a name reaction")
    if reaction_query:
        prompt = reaction_query
        content = "please give complete step by step reaction along with the complete name of the molecules for the reaction, the requested reaction is : "
        response = groq_response(content, prompt)
        response_functions = client_openai.chat.completions.create(
                    model="gpt-3.5-turbo",
                    messages=[{'role': 'user', 'content': response}],
                    functions=molecule_custom_functions_reaction,
                    function_call='auto'
                )
        data = json.loads(response_functions.choices[0].message.function_call.arguments)
        reactant_1 = data.get('reactant_1', '')
        reactant_2 = data.get('reactant_2', '')
        reagent_3 = data.get('reagent_3', '')
        product_4 = data.get('product_4', '')
        product_5 = data.get('product_5', '')
        
        reactant_1_smiles = requests.get(f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{reactant_1}/property/CanonicalSMILES/TXT").text if requests.get(f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{reactant_1}/property/CanonicalSMILES/TXT").status_code == 200 else ''
        reactant_2_smiles = requests.get(f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{reactant_2}/property/CanonicalSMILES/TXT").text if requests.get(f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{reactant_2}/property/CanonicalSMILES/TXT").status_code == 200 else ''
        reagent_3_smiles = requests.get(f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{reagent_3}/property/CanonicalSMILES/TXT").text if requests.get(f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{reagent_3}/property/CanonicalSMILES/TXT").status_code == 200 else ''
        product_4_smiles = requests.get(f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{product_4}/property/CanonicalSMILES/TXT").text if requests.get(f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{product_4}/property/CanonicalSMILES/TXT").status_code == 200 else ''
        product_5_smiles = requests.get(f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{product_5}/property/CanonicalSMILES/TXT").text if requests.get(f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{product_5}/property/CanonicalSMILES/TXT").status_code == 200 else ''
    
        #st.write("Reactant 1: ", reactant_1_smiles)
        #st.write("Reactant 2: ", reactant_2_smiles)
        #st.write("Reagent 3: ", reagent_3_smiles)
        #st.write("Product 4: ", product_4_smiles)
        #st.write("Product 5: ", product_5_smiles)
        # Building the reaction SMILES string dynamically based on available components
        # Building the reaction SMILES string
        reaction_components = []

        # Adding reactants
        reactants = [reactant for reactant in [reactant_1_smiles, reactant_2_smiles] if reactant]
        if reactants:
            reaction_components.append('.'.join(reactants))
        else:
            reaction_components.append('')

        # Adding reagents
        reagents = [reagent for reagent in [reagent_3_smiles] if reagent]
        if reagents:
            reaction_components.append('.'.join(reagents))
        else:
            reaction_components.append('')

        # Adding products
        products = [product for product in [product_4_smiles, product_5_smiles] if product]
        if products:
            reaction_components.append('.'.join(products))
        else:
            reaction_components.append('')

        reaction_smiles = '>'.join(reaction_components)
        try:
            # Generate the reaction from SMILES
            rxn = AllChem.ReactionFromSmarts(reaction_smiles, useSmiles=True)
            
            # Draw the reaction
            d2d = Draw.MolDraw2DCairo(800, 300)  # Adjust size as needed
            d2d.DrawReaction(rxn)
            png = d2d.GetDrawingText()
            
            # Save the drawing to a file
            with open('reaction_image.png', 'wb+') as f:
                f.write(png)
            image_variable = png
                #st.image('reaction_image.png')
                
                
        except Exception as e:
            st.write(f"An error occurred: {e}")
    if search_query:
        prompt = search_query
        content = "please correct the spelling and write teh precise one search keyword for and only give teh keyword, only 1 and nothing else other that that : "
        response = groq_response(content, prompt)
        response_functions = client_openai.chat.completions.create(
                model="gpt-3.5-turbo",
                messages=[{'role': 'user', 'content': response}],
                functions=keyword_custom_functions,
                function_call='auto'
            )
        data = json.loads(response_functions.choices[0].message.function_call.arguments)
        keyword = data['keyword']
        st.sidebar.write(keyword)
        # Perform the search
        videosSearch = VideosSearch(search_query, limit=3)
        video_one = VideosSearch(search_query, limit=1)
        for video in video_one.result()['result']:
            video_one_id = video['id']
        
        for video in videosSearch.result()['result']:
            video_id = video['id']  # Extract video ID
            
            # Display the video thumbnail
            #st.image(video['thumbnails'][0]['url'])
            
            # Display the video title
            #st.write(f"**{video['title']}**")
            
            try:
                # Fetch the transcript for the video ID
                transcript_list = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'])
                
                # Concatenating all text from the transcript
                transcript_text = "\n".join([t['text'] for t in transcript_list])
                
                # Concatenate the transcript to the all_video_transcripts variable
                all_video_transcripts += f"\n---\nTranscript for Video ID {video_id}:\n{transcript_text}\n---\n"
                
            except Exception as e:
                error_message = "Transcript not available or error in fetching transcript."
                # Concatenate the error message to the all_video_transcripts variable
                all_video_transcripts += f"\n---\nTranscript for Video ID {video_id}:\n{error_message}\n---\n"

    # At this point, all_video_transcripts contains transcripts for all videos concatenated as a single string.
    # You can display it or process it as needed.
    # Here's an example of displaying the combined transcripts:
    video_id = ""
    if all_video_transcripts:
        #st.text_area("All Video Transcripts", all_video_transcripts, height=300)
        prompt = all_video_transcripts
        content = "write a one sentence summary for the the given videos and always preserve and give me the vido_id always "
        video_compression = groq_response(content, prompt)
        compressed_transcripts = video_compression
        prompt = compressed_transcripts
        content = "give me the best video with maximum content and the best keywords from the transcript and always preserve and give me teh vido_id always "
        chat_completion = groq_response(content, prompt)
        #st.write(chat_completion.choices[0].message.content)
        video_id_fetch = chat_completion
        #st.write(video_id_fetch)
        response_functions = client_openai.chat.completions.create(
                model="gpt-3.5-turbo",
                messages=[{'role': 'user', 'content': video_id_fetch}],
                functions=video_custom_functions,
                function_call='auto'
            )
        data = json.loads(response_functions.choices[0].message.function_call.arguments)
        video_id = data['video_id']
        st.video(f"https://www.youtube.com/watch?v={video_id}")

    messages = st.container(height=630)
    if image_variable:
        messages.chat_message("assistant").write(f"When you react {reactant_1} with {reactant_2} using {reagent_3}, you get {product_4} and {product_5}" + " here is the reaction in 2D bond representation:")
        messages.image(image_variable)
    if check_box:
        messages.chat_message("assistant").write("Here is the Chem Sketcher for you to draw the molecule:")
        with messages.chat_message("assistant"):
            components.iframe("https://marvinjs.chemicalize.com/v1/fcc0cc8570204c48a6447859c71cf611/editor.html?frameId=2cd5fd97-f496-4b6f-8cbc-417acc66684f&origin=https%3A%2F%2Fwww.rcsb.org", height=600)     
    prompt_sidebar = st.chat_input("Say something")
    if prompt_sidebar:
        messages.chat_message("user").write(prompt_sidebar)
        prompt = prompt_sidebar
        sidebar_chat = groq_response("please answer thsi query : ", prompt)
        response_functions = client_openai.chat.completions.create(
            model="gpt-3.5-turbo",
            messages=[{'role': 'user', 'content': prompt_sidebar}],
            functions=molecule_custom_functions,
            function_call='auto'
        )
        try:
            arguments = response_functions.choices[0].message.function_call.arguments
            if arguments is not None:
                data = json.loads(arguments)
                molecule_name = data['molecule_name','']
                if molecule_name:
                    response = requests.get(f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{molecule_name}/cids/TXT")
                    cid = response.text
                    with messages.chat_message("assistant"):
                        st.write(f"Here is the molecule {molecule_name} in 3D you can interact with it too 😉:")
                        components.iframe(f"https://embed.molview.org/v1/?mode=balls&cid={cid}")
                        messages.chat_message("assistant").write(sidebar_chat) 
            else:
                data = json.loads(arguments)
                molecule_name = data['molecule_name','']
                if molecule_name:
                    response = requests.get(f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{molecule_name}/cids/TXT")
                    cid = response.text
                    with messages.chat_message("assistant"):
                        st.write(f"Here is the molecule {molecule_name} in 3D you can interact with it too 😉:")
                        components.iframe(f"https://embed.molview.org/v1/?mode=balls&cid={cid}")
                        messages.chat_message("assistant").write(sidebar_chat)
                data = {}  # or set to None or a default value as needed
        except (IndexError, KeyError, TypeError) as e:
            #print(f"Error accessing the data: {e}")
            messages.chat_message("assistant").write(sidebar_chat)
            data = {}    
            
if st.session_state.prompt:
        prompt = st.session_state.prompt
        selected_options = " ".join(st.session_state.selected_options)
        messages = [
            {"role": "user", "content": f"create a {selected_options} scenarios based task question for learning stereochemistry, create 4 scenarios each time and number them: {prompt}"},
        ]
        chat_completion = client_groq.chat.completions.create(
            messages=messages,
            model="mixtral-8x7b-32768",
        )
        response = chat_completion.choices[0].message.content

        if response:
            response_functions = client_openai.chat.completions.create(
                model="gpt-3.5-turbo",
                messages=[{'role': 'user', 'content': response}],
                functions=scenario_custom_functions,
                function_call='auto'
            )
            data = json.loads(response_functions.choices[0].message.function_call.arguments)

            # Tabs for scenarios
            scenario_tabs = ['Scenario 1', 'Scenario 2', 'Scenario 3', 'Scenario 4']
            tabs = st.tabs(scenario_tabs)
            for i, tab in enumerate(tabs):
                with tab:
                    st.header(scenario_tabs[i])
                    scenario_text = data[f'scenario_{i+1}']
                    st.write(scenario_text)
                    prompt = scenario_text
                    content = "subdivide this scenario into three subquestions and only give the questions. The scenario is: "
                    chat_completion_subquestions = groq_response(content, prompt)
                    scenario_generated = chat_completion_subquestions
                    st.write(scenario_generated)
                    prompt = scenario_generated
                    content = "give a sample ideal step-by-step format to attempt to answer this scenario question as a hint. Scenario: "
                    chat_completion_hint = groq_response(content, prompt)
                    st.text_area("Enter your answer here", key=f'answer_{i}')
                    
                    with st.expander("See hint for answering the question" + str(i+1) + "😀"): 
                        st.write(chat_completion_hint)
                    # Upload PDF button
                    uploaded_file = st.file_uploader("Upload your answer (PDF)", type="pdf", key=f"pdf_uploader_{i}")
                    if uploaded_file is not None:
                        st.success("File uploaded successfully!")
                        

                    col1, col2 = st.columns(2)
                    with col1:
                        with st.expander("See explanation 3D"):
                            components.iframe("https://embed.molview.org/v1/?mode=balls&cid=124527813")
                    with col2:
                        with st.expander("See explanation 2D"):
                            components.iframe("https://marvinjs.chemicalize.com/v1/fcc0cc8570204c48a6447859c71cf611/editor.html?frameId=2cd5fd97-f496-4b6f-8cbc-417acc66684f&origin=https%3A%2F%2Fwww.rcsb.org")

# Example of error handling with client_groq API calls