Spaces:

Aasher
/

Super_AI_Assistant

Configuration error

App Files Files Community

Aasher commited on Aug 10, 2024

Commit

db63cc0

2 Parent(s): 4fff95e c522c36

Merge branch 'main' of https://github.com/aasherkamal216/Super-GPT

Browse files

Files changed (4) hide show

main.py +0 -312
test2.py +0 -314
test4.py +29 -15
tts.py +0 -36

main.py DELETED Viewed

@@ -1,312 +0,0 @@
-import streamlit as st
-from audio_recorder_streamlit import audio_recorder
-from streamlit_vertical_slider import vertical_slider
-from streamlit_lottie import st_lottie
-import json
-from PIL import Image
-from io import BytesIO
-import base64
-from utils import visualize_display_page
-import google.generativeai as genai
-from langchain_groq import ChatGroq
-import os , random
-from dotenv import load_dotenv
-load_dotenv()
-st.set_page_config(
-    page_title="Super GPT",
-    page_icon="👽",
-    layout="wide",
-    initial_sidebar_state="auto",
-)
-st.title("Super GPT Assistant")
-google_models = [
-    "gemini-1.5-flash",
-    "gemini-1.5-pro",
-]
-groq_models = [
-    "llama-3.1-8b-instant",
-    "llama-3.1-70b-versatile",
-    "llama3-70b-8192",
-    "llama3-8b-8192",
-    "gemma2-9b-it",
-    "mixtral-8x7b-32768"
-]
-@st.cache_data
-def load_lottie_file(filepath: str):
-    with open(filepath, "r") as f:
-        return json.load(f)
-def get_llm_info(available_models):
-    with st.sidebar:
-        tip =tip = "Select Gemini models if you require multi-modal capabilities (text, image, audio and video inputs)"
-        model = st.selectbox("Choose LLM:", available_models, help=tip)
-        model_type = None
-        if model.startswith(("llama", "gemma", "mixtral")): model_type = "groq"
-        elif model.startswith("gemini"): model_type = "google"
-        with st.popover("⚙️Model Parameters", use_container_width=True):
-            temp = st.slider("Temperature:", min_value=0.0,
-                                            max_value=2.0, value=0.5, step=0.5)
-            max_tokens = st.slider("Maximum Tokens:", min_value=100,
-                                        max_value=2000, value=400, step=200)
-    return model, model_type, temp, max_tokens
-def messages_to_gemini(messages):
-    gemini_messages = []
-    prev_role = None
-    for message in messages:
-        if prev_role and (prev_role == message["role"]):
-            gemini_message = gemini_messages[-1]
-        else:
-            gemini_message = {
-                "role": "model" if message["role"] == "assistant" else "user",
-                "parts": [],
-            }
-        for content in message["content"]:
-            if content["type"] == "text":
-                gemini_message["parts"].append(content["text"])
-            elif content["type"] == "image_url":
-                gemini_message["parts"].append(base64_to_image(content["image_url"]["url"]))
-            elif content["type"] == "video_file":
-                gemini_message["parts"].append(genai.upload_file(content["video_file"]))
-            elif content["type"] == "audio_file":
-                gemini_message["parts"].append(genai.upload_file(content["audio_file"]))
-        if prev_role != message["role"]:
-            gemini_messages.append(gemini_message)
-        prev_role = message["role"]
-    return gemini_messages
-# Function to convert file to base64
-def get_image_base64(image_raw):
-    buffered = BytesIO()
-    image_raw.save(buffered, format=image_raw.format)
-    img_byte = buffered.getvalue()
-    return base64.b64encode(img_byte).decode('utf-8')
-def add_media_files_to_messages():
-    if st.session_state.uploaded_file:
-        file_type = st.session_state.uploaded_file.type
-        file_content = st.session_state.uploaded_file.getvalue()
-        if file_type.startswith("image"):
-            img = base64.b64encode(file_content).decode()
-            st.session_state.messages.append(
-                {
-                    "role": "user",
-                    "content": [{
-                        "type": "image_url",
-                        "image_url": {"url": f"data:{file_type};base64,{img}"}
-                    }]
-                }
-            )
-        elif file_type == "video/mp4":
-            video_base64 = base64.b64encode(file_content).decode()
-            st.session_state.messages.append(
-                {
-                    "role": "user",
-                    "content": [{
-                        "type": "video_file",
-                        "video_file": f"data:{file_type};base64,{video_base64}",
-                    }]
-                }
-            )
-        elif file_type.startswith("audio"):
-            audio_base64 = base64.b64encode(file_content).decode()
-            st.session_state.messages.append(
-                {
-                    "role": "user",
-                    "content": [{
-                        "type": "audio_file",
-                        "audio_file": f"data:{file_type};base64,{audio_base64}",
-                    }]
-                }
-            )
-def add_camera_img_to_messages():
-    if "camera_img" in st.session_state and st.session_state.camera_img:
-        img = base64.b64encode(st.session_state.camera_img.getvalue()).decode()
-        st.session_state.messages.append(
-            {
-                "role": "user",
-                "content": [{
-                    "type": "image_url",
-                    "image_url": {"url": f"data:image/jpeg;base64,{img}"}
-                }]
-            }
-        )
-with st.sidebar:
-    st.logo("logo.png")
-    api_cols = st.columns(2)
-    with api_cols[0]:
-        default_groq_api_key = os.getenv("GROQ_API_KEY") if os.getenv("GROQ_API_KEY") is not None else ""  # only for development environment, otherwise it should return None
-        with st.popover("🔐 Groq", use_container_width=True):
-            groq_api_key = st.text_input("Get your Groq API Key (https://console.groq.com/keys)", value=default_groq_api_key, type="password")
-    with api_cols[1]:
-        default_google_api_key = os.getenv("GOOGLE_API_KEY") if os.getenv("GOOGLE_API_KEY") is not None else ""  # only for development environment, otherwise it should return None
-        with st.popover("🔐 Google", use_container_width=True):
-            google_api_key = st.text_input("Get your Google API Key (https://aistudio.google.com/app/apikey)", value=default_google_api_key, type="password")
-if (groq_api_key == "" or groq_api_key is None or "gsk" not in groq_api_key) and (google_api_key == "" or google_api_key is None or "AIza" not in google_api_key):
-    st.warning("Please Add an API Key to proceed.")
-else:
-    col1, col2 = st.columns([1,6])
-    with col1:
-        audio_bytes = audio_recorder("Speak",
-                                     neutral_color="#728796",
-                                     recording_color="#f81f6f",
-                                     icon_name="microphone-lines",
-                                     icon_size="3x")
-    if "messages" not in st.session_state:
-        st.session_state.messages = []
-    # Handle speech input
-    if "prev_speech_hash" not in st.session_state:
-        st.session_state.prev_speech_hash = None
-    if audio_bytes and st.session_state.prev_speech_hash != hash(audio_bytes):
-        st.session_state.prev_speech_hash = hash(audio_bytes)
-        speech_base64 = base64.b64encode(audio_bytes).decode()
-        st.session_state.messages.append(
-            {
-                "role": "user",
-                "content": [{
-                    "type": "speech_input",
-                    "speech_input": f"data:audio/wav;base64,{speech_base64}",
-                }]
-            }
-        )
-    for message in st.session_state.messages:
-        with col2:
-            with st.chat_message(message["role"]):
-                for content in message["content"]:
-                    if content["type"] == "text":
-                        st.markdown(content["text"])
-                    elif content["type"] == "image_url":
-                        st.image(content["image_url"]["url"], use_column_width=True)
-                    elif content["type"] == "video_file":
-                        st.video(content["video_file"])
-                    elif content["type"] == "audio_file":
-                        st.audio(content["audio_file"], autoplay=True)
-                    elif content["type"] == "speech_input":
-                        st.audio(content["speech_input"])
-    with st.sidebar:
-        st.divider()
-        columns = st.columns(2)
-        # animation
-        with columns[0]:
-            lottie_animation = load_lottie_file("animation.json")
-            if lottie_animation:
-                st_lottie(lottie_animation, height=100, width=100, quality="high", key="lottie_anim")
-        with columns[1]:
-            if st.toggle("Voice Response"):
-                response_lang = st.selectbox("Available Voices:", options=["Alex","Ana","Daniel"], key="voice_response")
-        available_models = []  + (google_models if google_api_key else []) + (groq_models if groq_api_key else [])
-        model, model_type, temperature, max_tokens = get_llm_info(available_models)
-        st.divider()
-        if model_type == "google":
-            st.write("Upload a file or take a picture")
-            media_cols = st.columns(2)
-            with media_cols[0]:
-                with st.popover("📁 Upload", use_container_width=True):
-                    st.file_uploader(
-                        "Upload an image, audio or a video",
-                        type=["png", "jpg", "jpeg", "wav", "mp3", "mp4"],
-                        accept_multiple_files=False,
-                        key="uploaded_file",
-                        on_change=add_media_files_to_messages,
-                    )
-            with media_cols[1]:
-                with st.popover("📷 Camera", use_container_width=True):
-                    activate_camera = st.checkbox("Activate camera")
-                    if activate_camera:
-                        st.camera_input(
-                            "Take a picture",
-                            key="camera_img",
-                            on_change=add_camera_img_to_messages,
-                        )
-        else:
-            pass
-    # temperature = vertical_slider(
-    #     label = "Temperature",  #Optional
-    #     key = "vert_01" ,
-    #     height = 100, #Optional - Defaults to 300#Optional - Defaults to "circle"
-    #     step = 1, #Optional - Defaults to 1
-    #     default_value=5,#Optional - Defaults to 0
-    #     min_value= 0, # Defaults to 0
-    #     max_value= 10, # Defaults to 10
-    #     track_color = "blue",
-    #     thumb_shape="square", #Optional - Defaults to #D3D3D3
-    #     slider_color = 'lighgray', #Optional - Defaults to #29B5E8
-    #     thumb_color= "orange", #Optional - Defaults to #11567f
-    #     value_always_visible = False ,#Optional - Defaults to False
-    # )
-if prompt:= st.chat_input("Type you question", key="question"):
-    with col2:
-        st.session_state.messages.append(
-                    {
-                        "role": "user",
-                        "content": [{
-                            "type": "text",
-                            "text": prompt,
-                        }]
-                    }
-                )
-        st.chat_message("user").markdown(prompt)
-# Confirmation popup window
-# selection_dict = {"file_and_answer": "", "prompt": "", "respuesta_chat": ""}
-# st.button("Visualize", on_click=visualize_display_page, key="visualiza", args=[selection_dict])

test2.py DELETED Viewed

@@ -1,314 +0,0 @@
-import streamlit as st
-from audio_recorder_streamlit import audio_recorder
-from PIL import Image
-from io import BytesIO
-import base64
-from utils import set_safety_settings, google_models, groq_models, get_llm_info
-import google.generativeai as genai
-import os
-from dotenv import load_dotenv
-load_dotenv()
-st.title("Super AI Assistant")
-###--- Function to convert base64 to temp file ---###
-def base64_to_temp_file(base64_string, file_extension):
-    base64_string = base64_string.split(",")[1]
-    file_bytes = BytesIO(base64.b64decode(base64_string))
-    temp_file_path = f"temp_file.{file_extension}"
-    with open(temp_file_path, "wb") as temp_file:
-        temp_file.write(file_bytes.read())
-    return temp_file_path
-###--- Function for preparing messages for Gemini---###
-def messages_to_gemini(messages):
-    gemini_messages = []
-    prev_role = None
-    for message in messages:
-        if prev_role and (prev_role == message["role"]):
-            gemini_message = gemini_messages[-1]
-        else:
-            gemini_message = {
-                "role": "model" if message["role"] == "assistant" else "user",
-                "parts": [],
-            }
-        for content in message["content"]:
-            if content["type"] == "text":
-                gemini_message["parts"].append(content["text"])
-            elif content["type"] == "image_url":
-                gemini_message["parts"].append(base64_to_image(content["image_url"]["url"]))
-            elif content["type"] == "video_file":
-                video_file_path = base64_to_temp_file(content["video_file"], "mp4")
-                with st.spinner("Sending video file to Gemini..."):
-                    gemini_message["parts"].append(genai.upload_file(path=video_file_path))
-                os.remove(video_file_path)
-            elif content["type"] == "audio_file":
-                audio_file_path = base64_to_temp_file(content["audio_file"], "wav")
-                with st.spinner("Sending audio file to Gemini..."):
-                    gemini_message["parts"].append(genai.upload_file(path=audio_file_path))
-                os.remove(audio_file_path)
-            elif content["type"] == "speech_input":
-                speech_file_path = base64_to_temp_file(content["speech_input"], "wav")
-                with st.spinner("Sending audio file to Gemini..."):
-                    gemini_message["parts"].append(genai.upload_file(path=speech_file_path))
-                os.remove(speech_file_path)
-        if prev_role != message["role"]:
-            gemini_messages.append(gemini_message)
-        prev_role = message["role"]
-    return gemini_messages
-##-- Converting base64 to image ---##
-def base64_to_image(base64_string):
-    base64_string = base64_string.split(",")[1]
-    return Image.open(BytesIO(base64.b64decode(base64_string)))
-##--- Function for adding media files to session_state messages ---###
-def add_media_files_to_messages():
-    if st.session_state.uploaded_file:
-        file_type = st.session_state.uploaded_file.type
-        file_content = st.session_state.uploaded_file.getvalue()
-        if file_type.startswith("image"):
-            img = base64.b64encode(file_content).decode()
-            st.session_state.messages.append(
-                {
-                    "role": "user",
-                    "content": [{
-                        "type": "image_url",
-                        "image_url": {"url": f"data:{file_type};base64,{img}"}
-                    }]
-                }
-            )
-        elif file_type == "video/mp4":
-            video_base64 = base64.b64encode(file_content).decode()
-            st.session_state.messages.append(
-                {
-                    "role": "user",
-                    "content": [{
-                        "type": "video_file",
-                        "video_file": f"data:{file_type};base64,{video_base64}",
-                    }]
-                }
-            )
-        elif file_type.startswith("audio"):
-            audio_base64 = base64.b64encode(file_content).decode()
-            st.session_state.messages.append(
-                {
-                    "role": "user",
-                    "content": [{
-                        "type": "audio_file",
-                        "audio_file": f"data:{file_type};base64,{audio_base64}",
-                    }]
-                }
-            )
-###--- FUNCTION TO ADD CAMERA IMAGE TO MESSAGES ---##
-def add_camera_img_to_messages():
-    if "camera_img" in st.session_state and st.session_state.camera_img:
-        img = base64.b64encode(st.session_state.camera_img.getvalue()).decode()
-        st.session_state.messages.append(
-            {
-                "role": "user",
-                "content": [{
-                    "type": "image_url",
-                    "image_url": {"url": f"data:image/jpeg;base64,{img}"}
-                }]
-            }
-        )
-##--- FUNCTION TO RESET CONVERSATION ---##
-def reset_conversation():
-    if "messages" in st.session_state and len(st.session_state.messages) > 0:
-        st.session_state.pop("messages", None)
-    for file in genai.list_files():
-        genai.delete_file(file.name)
-##--- FUNCTION TO STREAM LLM RESPONSE ---##
-def stream_llm_response(model_params, model_type="google", api_key=None):
-    response_message = ""
-    if model_type == "google":
-        genai.configure(api_key=api_key)
-        model = genai.GenerativeModel(
-                model_name = model_params["model"],
-                generation_config={
-                    "temperature": model_params["temperature"],
-                    "max_output_tokens": model_params["max_tokens"],
-                },
-                safety_settings=set_safety_settings(),
-                system_instruction="""You are a helpful assistant who asnwers user's questions professionally and politely."""
-            )
-        gemini_messages = messages_to_gemini(st.session_state.messages)
-        for chunk in model.generate_content(contents=gemini_messages, stream=True,):
-            chunk_text = chunk.text or ""
-            response_message += chunk_text
-            yield chunk_text
-    st.session_state.messages.append({
-    "role": "assistant",
-    "content": [
-        {
-            "type": "text",
-            "text": response_message,
-        }
-    ]})
-##--- API KEYS ---##
-with st.sidebar:
-    st.logo("logo.png")
-    api_cols = st.columns(2)
-    with api_cols[0]:
-        with st.popover("🔐 Groq", use_container_width=True):
-            groq_api_key = st.text_input("Get your Groq API Key (https://console.groq.com/keys)", type="password")
-    with api_cols[1]:
-        with st.popover("🔐 Google", use_container_width=True):
-            google_api_key = st.text_input("Get your Google API Key (https://aistudio.google.com/app/apikey)", type="password")
-##--- API KEY CHECK ---##
-if (groq_api_key == "" or groq_api_key is None or "gsk" not in groq_api_key) and (google_api_key == "" or google_api_key is None or "AIza" not in google_api_key):
-    st.warning("Please Add an API Key to proceed.")
-####--- LLM SIDEBAR ---###
-else:
-    with st.sidebar:
-        available_models = []  + (google_models if google_api_key else []) + (groq_models if groq_api_key else [])
-        model, model_type, temperature, max_tokens = get_llm_info(available_models)
-        model_params = {
-                "model": model,
-                "temperature": temperature,
-                "max_tokens": max_tokens
-            }
-        st.divider()
-        ###---- Google Gemini Sidebar Customization----###
-        if model_type == "google":
-            st.write("Upload a file or take a picture")
-            media_cols = st.columns(2)
-            with media_cols[0]:
-                with st.popover("📁 Upload", use_container_width=True):
-                    st.file_uploader(
-                        "Upload an image, audio or a video",
-                        type=["png", "jpg", "jpeg", "wav", "mp3", "mp4"],
-                        accept_multiple_files=False,
-                        key="uploaded_file",
-                        on_change=add_media_files_to_messages,
-                    )
-            with media_cols[1]:
-                with st.popover("📷 Camera", use_container_width=True):
-                    activate_camera = st.checkbox("Activate camera")
-                    if activate_camera:
-                        st.camera_input(
-                            "Take a picture",
-                            key="camera_img",
-                            on_change=add_camera_img_to_messages,
-                        )
-        ###---- Groq Models Sidebar Customization----###
-        else:
-            pass  # will add later
-######-----  Main Interface -----#######
-    chat_col1, chat_col2 = st.columns([1,6])
-    with chat_col1:
-        ###--- Audio Recording ---###
-        audio_bytes = audio_recorder("Speak",
-                                     neutral_color="#f5f8fc",
-                                     recording_color="#f81f6f",
-                                     icon_name="microphone-lines",
-                                     icon_size="3x")
-        ###--- Reset Conversation ---###
-        st.button(
-                "🗑️ Reset",
-                use_container_width=True,
-                on_click=reset_conversation,
-                help="If clicked, conversation will be reset.",
-            )
-    if "messages" not in st.session_state:
-        st.session_state.messages = []
-    # Handle speech input
-    if "prev_speech_hash" not in st.session_state:
-        st.session_state.prev_speech_hash = None
-    if audio_bytes and st.session_state.prev_speech_hash != hash(audio_bytes):
-        st.session_state.prev_speech_hash = hash(audio_bytes)
-        speech_base64 = base64.b64encode(audio_bytes).decode()
-        st.session_state.messages.append(
-            {
-                "role": "user",
-                "content": [{
-                    "type": "speech_input",
-                    "speech_input": f"data:audio/wav;base64,{speech_base64}",
-                }]
-            }
-        )
-    with chat_col2:
-        message_container = st.container(height=380, border=False)
-        for message in st.session_state.messages:
-            avatar = "assistant.png" if message["role"] == "assistant" else "user.png"
-            with message_container.chat_message(message["role"], avatar=avatar):
-                for content in message["content"]:
-                    if content["type"] == "text":
-                        st.markdown(content["text"])
-                    elif content["type"] == "image_url":
-                        st.image(content["image_url"]["url"])
-                    elif content["type"] == "video_file":
-                        st.video(content["video_file"])
-                    elif content["type"] == "audio_file":
-                        st.audio(content["audio_file"], autoplay=True)
-                    elif content["type"] == "speech_input":
-                        st.audio(content["speech_input"])
-    ###----- User Question -----###
-    if prompt:= st.chat_input("Type you question", key="question"):
-        message_container.chat_message("user", avatar="user.png").markdown(prompt)
-        st.session_state.messages.append(
-                    {
-                        "role": "user",
-                        "content": [{
-                            "type": "text",
-                            "text": prompt,
-                        }]
-                    }
-                )
-        ###----- Generate response -----###
-        with message_container.chat_message("assistant", avatar="assistant.png"):
-            model2key = {
-                        "openai": groq_api_key,
-                        "google": google_api_key,
-                    }
-            st.write_stream(stream_llm_response(
-                        model_params=model_params,
-                        model_type=model_type,
-                        api_key=model2key[model_type]
-                    )
-                )

test4.py CHANGED Viewed

@@ -7,8 +7,7 @@ from streamlit_lottie import st_lottie
 import json
 from utils import set_safety_settings, about, extract_all_pages_as_images
 import google.generativeai as genai
-from google.generativeai.types import SafetyRatingDict
-import os, random
 import tempfile
 import asyncio
 import edge_tts
@@ -99,6 +98,7 @@ def base64_to_temp_file(base64_string, unique_name, file_extension):
     temp_file_path = f"{unique_name}.{file_extension}"
     with open(temp_file_path, "wb") as temp_file:
         temp_file.write(file_bytes.read())
     return temp_file_path
@@ -123,13 +123,22 @@ def messages_to_gemini(messages):
             elif content["type"] == "image_url":
                 gemini_message["parts"].append(base64_to_image(content["image_url"]["url"]))
-            elif content["type"] in ["video_file", "audio_file"]:
                 file_name = content['unique_name']
                 if file_name not in uploaded_files:
-                    temp_file_path = base64_to_temp_file(content[content["type"]], file_name, "mp4" if content["type"] == "video_file" else "wav")
-                    with st.spinner(f"Sending {content['type'].replace('_', ' ')} to Gemini..."):
                         gemini_message["parts"].append(genai.upload_file(path=temp_file_path))
                     os.remove(temp_file_path)
@@ -182,7 +191,10 @@ def add_pdf_file_to_messages():
             }
         )
 ##--- Function for adding media files to session_state messages ---###
 def add_media_files_to_messages():
     if st.session_state.uploaded_file:
@@ -201,15 +213,17 @@ def add_media_files_to_messages():
                 }
             )
         elif file_type == "video/mp4":
-            video_base64 = base64.b64encode(file_content).decode()
-            unique_id = random.randint(1000, 9999)
             st.session_state.messages.append(
                 {
                     "role": "user",
                     "content": [{
                         "type": "video_file",
-                        "video_file": f"data:{file_type};base64,{video_base64}",
-                        "unique_name": f"temp_{unique_id}"
                     }]
                 }
             )
@@ -293,15 +307,15 @@ with st.sidebar:
     api_cols = st.columns(2)
     with api_cols[0]:
         with st.popover("🔐 Groq", use_container_width=True):
-            groq_api_key = st.text_input("Click [here](https://console.groq.com/keys) to get your Groq API key", value=os.getenv("GROQ_API_KEY") , type="password")
     with api_cols[1]:
         with st.popover("🔐 Google", use_container_width=True):
-            google_api_key = st.text_input("Click [here](https://aistudio.google.com/app/apikey) to get your Google API key", value=os.getenv("GOOGLE_API_KEY") , type="password")
 ##--- API KEY CHECK ---##
 if (groq_api_key == "" or groq_api_key is None or "gsk" not in groq_api_key) and (google_api_key == "" or google_api_key is None or "AIza" not in google_api_key):
-    st.warning("Please Add an API Key to proceed.")
 ####--- LLM SIDEBAR ---###
 else:
@@ -487,4 +501,4 @@ else:
                         os.unlink(temp_file_path)  # Clean up the temporary audio file
             except genai.types.generation_types.BlockedPromptException as e:
-                st.error(f"An error occurred: {e}", icon="❌")

 import json
 from utils import set_safety_settings, about, extract_all_pages_as_images
 import google.generativeai as genai
+import os, random, time
 import tempfile
 import asyncio
 import edge_tts
     temp_file_path = f"{unique_name}.{file_extension}"
     with open(temp_file_path, "wb") as temp_file:
         temp_file.write(file_bytes.read())
+        time.sleep(1)
     return temp_file_path
             elif content["type"] == "image_url":
                 gemini_message["parts"].append(base64_to_image(content["image_url"]["url"]))
+            elif content["type"] == "video_file":
+                file_path = content["video_file"]
+                if file_path.split(".")[0] not in uploaded_files:
+                    with st.spinner(f"Sending video to Gemini..."):
+                        try:
+                            file = genai.upload_file(path=file_path)
+                            gemini_message["parts"].append(file)
+                        except Exception as e:
+                            st.error(f"An error occurred {e}")
+            elif content["type"] == "audio_file":
                 file_name = content['unique_name']
                 if file_name not in uploaded_files:
+                    temp_file_path = base64_to_temp_file(content["audio_file"], file_name, "wav")
+                    with st.spinner(f"Sending audio file to Gemini..."):
                         gemini_message["parts"].append(genai.upload_file(path=temp_file_path))
                     os.remove(temp_file_path)
             }
         )
+def save_uploaded_video(video_file, file_path):
+    with open(file_path, "wb") as f:
+        f.write(video_file.read())
 ##--- Function for adding media files to session_state messages ---###
 def add_media_files_to_messages():
     if st.session_state.uploaded_file:
                 }
             )
         elif file_type == "video/mp4":
+            file_name = st.session_state.uploaded_file.name
+            file_path = os.path.join(tempfile.gettempdir(), file_name)
+            save_uploaded_video(st.session_state.uploaded_file, file_path)
             st.session_state.messages.append(
                 {
                     "role": "user",
                     "content": [{
                         "type": "video_file",
+                        "video_file": file_path,
+                        "unique_name": file_name
                     }]
                 }
             )
     api_cols = st.columns(2)
     with api_cols[0]:
         with st.popover("🔐 Groq", use_container_width=True):
+            groq_api_key = st.text_input("Click [here](https://console.groq.com/keys) to get your Groq API key", type="password")
     with api_cols[1]:
         with st.popover("🔐 Google", use_container_width=True):
+            google_api_key = st.text_input("Click [here](https://aistudio.google.com/app/apikey) to get your Google API key", type="password")
 ##--- API KEY CHECK ---##
 if (groq_api_key == "" or groq_api_key is None or "gsk" not in groq_api_key) and (google_api_key == "" or google_api_key is None or "AIza" not in google_api_key):
+    st.info("Please enter an API key in the sidebar to proceed.")
 ####--- LLM SIDEBAR ---###
 else:
                         os.unlink(temp_file_path)  # Clean up the temporary audio file
             except genai.types.generation_types.BlockedPromptException as e:
+                st.error(f"An error occurred: {e}", icon="❌")

tts.py DELETED Viewed

@@ -1,36 +0,0 @@
-import streamlit as st
-import asyncio
-import edge_tts
-import io
-import tempfile
-import os
-VOICES = ['en-US-GuyNeural','en-US-JennyNeural',"hi-IN-SwaraNeural", "en-PH-JamesNeural"]
-st.title("Text-to-Speech with Edge TTS")
-text_input = st.text_area("Enter the text you want to convert to speech:", "Hello World")
-voice_selection = st.selectbox("Select a voice:", VOICES)
-async def generate_speech(text, voice):
-    communicate = edge_tts.Communicate(text, voice)
-    with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_file:
-        await communicate.save(temp_file.name)
-        temp_file_path = temp_file.name
-    with open(temp_file_path, "rb") as audio_file:
-        audio_data = audio_file.read()
-    os.unlink(temp_file_path)  # Delete the temporary file
-    return audio_data
-if st.button("Generate and Play Speech"):
-    if text_input:
-        with st.spinner("Generating speech..."):
-            audio_data = asyncio.run(generate_speech(text_input, voice_selection))
-            # Play the audio
-            st.audio(audio_data, format="audio/mp3")
-            st.success("Speech generated successfully!")
-    else:
-        st.warning("Please enter some text to convert to speech.")