Spaces:

davisandshirtliff
/

ProductManualAssistant

Sleeping

App Files Files Community

Ronochieng commited on Apr 28, 2025

Commit

c6ffb8c

verified ·

1 Parent(s): 4810e8f

Update app.py

Browse files

Files changed (1) hide show

app.py +111 -307

app.py CHANGED Viewed

@@ -1,6 +1,8 @@
 import streamlit as st
 import tempfile
 import os, sys
 import json
 import requests
 import base64
@@ -8,8 +10,7 @@ from io import BytesIO
 from requests.auth import HTTPBasicAuth
 from typing import Tuple, Dict, List, Optional
 from langchain_community.vectorstores import FAISS
-from langchain_huggingface import HuggingFaceEmbeddings
-from langchain_community.llms import LlamaCpp
 from langchain.chains import ConversationalRetrievalChain, RetrievalQA
 from langchain.memory import ConversationBufferMemory
 from langchain.prompts import PromptTemplate
@@ -23,16 +24,6 @@ from dotenv import load_dotenv, find_dotenv
 import pandas as pd
 import pickle
 import time
-import numpy as np
-from faster_whisper import WhisperModel
-import soundfile as sf
-import io
-try:
-    from TTS.api import TTS
-    TTS_AVAILABLE = True
-except ImportError:
-    TTS_AVAILABLE = False
 sys.path.append("../..")
@@ -43,10 +34,8 @@ DB_FAISS_PATH = 'vectorstore/db_faiss'
 API_USERNAME = os.getenv('API_USERNAME')
 API_PASSWORD = os.getenv('API_PASSWORD')
 BASE_URL = os.getenv('BASE_URL')
-GEMMA_MODEL_PATH = os.getenv('GEMMA_MODEL_PATH', 'stduhpf/google-gemma-3-4b-it-qat-q4_0-gguf-small')
-HF_EMBEDDINGS_MODEL = os.getenv('HF_EMBEDDINGS_MODEL', 'BAAI/bge-small-en-v1.5')
-WHISPER_MODEL_SIZE = os.getenv('WHISPER_MODEL_SIZE', 'tiny')
-TTS_MODEL = os.getenv('TTS_MODEL', 'tts_models/en/ljspeech/tacotron2-DDC')
 # Technical terms to keep in English
 TECHNICAL_TERMS = [
@@ -71,137 +60,76 @@ class ProductResponse(BaseModel):
     explanation: str = Field(..., description="Detailed explanation")
     additional_notes: Optional[str] = None
-@st.cache_resource
-def load_asr_model():
-    """Load and cache the Faster Whisper model"""
-    try:
-        model = WhisperModel(WHISPER_MODEL_SIZE, device="cpu", compute_type="int8")
-        return model
-    except Exception as e:
-        st.error(f"Error loading speech model: {str(e)}")
-        return None
-@st.cache_resource
-def load_tts_model():
-    """Load and cache the TTS model"""
-    if not TTS_AVAILABLE:
-        st.warning("TTS library not available. Install with: pip install TTS")
-        return None
-    try:
-        tts = TTS(model_name=TTS_MODEL)
-        return tts
-    except Exception as e:
-        st.error(f"Error loading TTS model: {str(e)}")
-        return None
-def process_audio_with_local_models(audio_bytes: bytes, target_language: str, proficiency_level: str) -> Tuple[str, str, bytes]:
-    """Process audio using Faster Whisper for speech recognition"""
-    # Convert audio bytes to format compatible with soundfile
     with tempfile.NamedTemporaryFile(suffix=".wav", delete=True) as temp_audio:
         temp_audio.write(audio_bytes)
         temp_audio.flush()
-        # Load the ASR model
-        asr_model = load_asr_model()
-        if asr_model is None:
-            return "Could not process audio due to ASR model loading error", "", b""
-        # Transcribe audio
-        segments, info = asr_model.transcribe(temp_audio.name, beam_size=5)
-        original_text = " ".join([segment.text for segment in segments])
-    # Get translation using Gemma model (reuse the existing LLM)
-    if target_language != "English":
-        # Initialize the Gemma LLM
-        llm = initialize_llm()
-        # Adjust complexity based on proficiency level
-        complexity_instruction = {
-            "Beginner": "Use simple language and avoid technical jargon.",
-            "Intermediate": "Use a balanced mix of technical and simplified language.",
-            "Advanced": "You can use technical language and detailed explanations."
         }
-        translation_prompt = f"""Translate the following text to {target_language}, keeping these technical terms unchanged: {', '.join(TECHNICAL_TERMS)}. {complexity_instruction[proficiency_level]}
-Original text: {original_text}
-Translation:"""
-        translated_text = llm.invoke(translation_prompt)
-    else:
-        translated_text = original_text
     # Generate translated audio
-    translated_audio = generate_speech(translated_text)
-    return original_text, translated_text, translated_audio
-def generate_speech(text: str) -> bytes:
-    """Generate speech from text using a local TTS model"""
-    tts_model = load_tts_model()
-    if tts_model:
-        try:
-            with tempfile.NamedTemporaryFile(suffix=".wav", delete=True) as temp_audio:
-                # Generate speech file
-                tts_model.tts_to_file(text=text, file_path=temp_audio.name)
-                temp_audio.flush()
-                # Read the audio file as bytes
-                with open(temp_audio.name, "rb") as f:
-                    audio_bytes = f.read()
-            return audio_bytes
-        except Exception as e:
-            st.error(f"Error generating speech: {str(e)}")
-            return b""
-    # Return empty bytes if TTS model is not available
-    return b""
-@st.cache_resource
-def initialize_llm():
-    """Initialize the Gemma 3 4B model using LlamaCpp"""
-    try:
-        import os
-        # Define model path
-        model_path = "gemma-3-4b-it-q4_0_s.gguf"
-        # Check if model exists, if not, download it
-        if not os.path.exists(model_path):
-            st.info("Model file not found. Downloading Gemma model (this may take a while)...")
-            # You can use huggingface_hub to download the model
-            from huggingface_hub import hf_hub_download
-            model_path = hf_hub_download(
-                repo_id="stduhpf/google-gemma-3-4b-it-qat-q4_0-gguf-small",
-                filename="gemma-3-4b-it-q4_0_s.gguf",
-                cache_dir="."
-            )
-            st.success(f"Model downloaded to {model_path}")
-        # Use LangChain's LlamaCpp integration
-        llm = LlamaCpp(
-            model_path=model_path,
-            temperature=0.7,
-            max_tokens=2048,
-            n_ctx=4096,
-            top_p=1,
-            verbose=False,
-        )
-        return llm
-    except Exception as e:
-        st.error(f"Error loading model: {str(e)}")
-        return None
 def initialize_chatbot(proficiency_level: str):
     """Initialize enhanced RAG system with memory, considering proficiency level"""
-    # Use HuggingFace embeddings instead of OpenAI
-    embedding_model = HuggingFaceEmbeddings(model_name=HF_EMBEDDINGS_MODEL)
     db = FAISS.load_local(DB_FAISS_PATH, embedding_model, allow_dangerous_deserialization=True)
     faiss_retriever = db.as_retriever()
@@ -229,12 +157,7 @@ def initialize_chatbot(proficiency_level: str):
         input_variables=["question", "context"]
     )
-    # Initialize the Gemma LLM
-    llm = initialize_llm()
-    if llm is None:
-        st.error("Failed to initialize the LLM. Please check the model path and try again.")
-        return None
     qa_chain = RetrievalQA.from_chain_type(
         llm=llm,
@@ -294,9 +217,6 @@ def process_text_input(user_input: str, target_language: str, proficiency_level:
     # Translate if needed
     if target_language != "English":
-        # Initialize the Gemma LLM
-        llm = initialize_llm()
         # Adjust complexity based on proficiency level
         complexity_instruction = {
             "Beginner": "Use simple language and avoid technical jargon.",
@@ -304,21 +224,36 @@ def process_text_input(user_input: str, target_language: str, proficiency_level:
             "Advanced": "You can use technical language and detailed explanations."
         }
-        translation_prompt = f"""Translate the following text to {target_language}, preserving these technical terms: {', '.join(TECHNICAL_TERMS)}. {complexity_instruction[proficiency_level]}
-Text to translate: {answer}
-Translation:"""
-        answer = llm.invoke(translation_prompt)
     # Generate audio for the answer
-    answer_audio = generate_speech(answer)
     return answer, answer_audio
 def display_chat_message(is_user: bool, message: str, audio_bytes=None, is_loading=False):
-    """Display a chat message with modern styling and avatar with theme compatibility"""
     message_class = "user-message" if is_user else "assistant-message"
     avatar_class = "user-avatar" if is_user else "assistant-avatar"
     content_class = "user-content" if is_user else "assistant-content"
@@ -364,20 +299,17 @@ def set_page_style():
     .chat-container {
         padding: 10px 0;
     }
     .user-message {
         display: flex;
         align-items: flex-start;
         margin-bottom: 24px;
     }
     .assistant-message {
         display: flex;
         align-items: flex-start;
         margin-bottom: 24px;
         flex-direction: row-reverse;
     }
     .message-avatar {
         width: 40px;
         height: 40px;
@@ -386,92 +318,41 @@ def set_page_style():
         align-items: center;
         justify-content: center;
         font-size: 18px;
-        flex-shrink: 0;
     }
-    /* Color-scheme adaptive styles */
     .user-avatar {
         background-color: var(--primary-color, #e91e63);
         margin-right: 12px;
-        color: white;
     }
     .assistant-avatar {
-        background-color: var(--secondary-color, #795548);
         margin-left: 12px;
-        color: white;
     }
     .message-content {
-        background-color: var(--background-color, rgba(128, 128, 128, 0.15));
         padding: 12px 16px;
         border-radius: 18px;
         max-width: 75%;
         color: var(--text-color, inherit);
     }
-    /* Apply custom styles for light/dark mode */
-    @media (prefers-color-scheme: dark) {
-        .message-content {
-            background-color: rgba(255, 255, 255, 0.1);
-            color: rgba(255, 255, 255, 0.9);
-        }
-        .user-avatar {
-            background-color: #e91e63;
-        }
-        .assistant-avatar {
-            background-color: #795548;
-        }
-        .typing-indicator span {
-            background-color: rgba(255, 255, 255, 0.6);
-        }
-    }
-    @media (prefers-color-scheme: light) {
-        .message-content {
-            background-color: rgba(0, 0, 0, 0.05);
-            color: rgba(0, 0, 0, 0.9);
-        }
-        .user-avatar {
-            background-color: #e91e63;
-        }
-        .assistant-avatar {
-            background-color: #795548;
-        }
-        .typing-indicator span {
-            background-color: rgba(0, 0, 0, 0.6);
-        }
-    }
     .user-content {
         border-top-left-radius: 4px;
     }
     .assistant-content {
         border-top-right-radius: 4px;
     }
     .audio-player {
         margin-top: 8px;
         width: 100%;
         border-radius: 12px;
         overflow: hidden;
     }
     .stAudio {
         width: 100% !important;
     }
     .stAudio > div {
         border-radius: 12px !important;
     }
     .title-container {
         text-align: center;
         padding: 15px;
@@ -480,7 +361,6 @@ def set_page_style():
         color: white;
         margin-bottom: 20px;
     }
     /* Improved input container with proper alignment and theme compatibility */
     .input-area {
         display: flex;
@@ -493,44 +373,23 @@ def set_page_style():
         width: 100%;
         border: 1px solid var(--input-border-color, rgba(128, 128, 128, 0.2));
     }
-    /* Adapt input fields to light/dark mode */
-    @media (prefers-color-scheme: dark) {
-        .input-area {
-            background-color: rgba(255, 255, 255, 0.1);
-            border-color: rgba(255, 255, 255, 0.2);
-        }
-    }
-    @media (prefers-color-scheme: light) {
-        .input-area {
-            background-color: rgba(0, 0, 0, 0.05);
-            border-color: rgba(0, 0, 0, 0.1);
-        }
-    }
     .input-area .stTextInput {
         flex-grow: 1;
     }
     .stTextInput>div>div>input {
         background-color: transparent !important;
         border: none !important;
         padding: 8px 0 !important;
         box-shadow: none !important;
-        color: var(--text-color, inherit) !important;
     }
     /* Remove padding and margin from the container columns */
     .input-container-col .stTextInput {
         margin-bottom: 0 !important;
     }
     .button-col div {
         display: flex;
         justify-content: flex-end;
     }
     .send-button {
         background-color: var(--primary-color, #1976d2);
         color: white;
@@ -543,28 +402,24 @@ def set_page_style():
         padding: 0 !important;
         min-height: 0 !important;
     }
     /* Loading indicator animation */
     .loading-message {
         min-width: 70px;
     }
     .typing-indicator {
         display: flex;
         align-items: center;
         justify-content: center;
     }
     .typing-indicator span {
         height: 8px;
         width: 8px;
         margin: 0 2px;
-        background-color: #9E9E9E;
         display: block;
         border-radius: 50%;
         opacity: 0.4;
     }
     .typing-indicator span:nth-of-type(1) {
         animation: typing 1s infinite;
     }
@@ -574,7 +429,6 @@ def set_page_style():
     .typing-indicator span:nth-of-type(3) {
         animation: typing 1s 0.4s infinite;
     }
     @keyframes typing {
         0% {
             transform: translateY(0px);
@@ -589,64 +443,42 @@ def set_page_style():
             opacity: 0.4;
         }
     }
     /* Align the columns properly */
     .stHorizontal .stColumn {
         padding-left: 0 !important;
         padding-right: 0 !important;
     }
-    /* Model loading indicator - theme adaptive */
-    .model-loading {
-        background-color: var(--error-bg-color, #ffebee);
-        color: var(--error-text-color, #b71c1c);
-        padding: 10px;
-        border-radius: 8px;
-        margin-bottom: 15px;
-        border-left: 4px solid var(--error-border-color, #f44336);
     }
     @media (prefers-color-scheme: dark) {
-        .model-loading {
-            background-color: rgba(244, 67, 54, 0.2);
-            color: #ef9a9a;
-            border-left-color: #ef5350;
         }
-    }
-    /* System information section */
-    .system-info {
-        background-color: var(--info-bg-color, rgba(33, 150, 243, 0.1));
-        padding: 10px;
-        border-radius: 8px;
-        margin-top: 15px;
-    }
-    @media (prefers-color-scheme: dark) {
-        .system-info {
-            background-color: rgba(33, 150, 243, 0.15);
         }
     }
     </style>
     """, unsafe_allow_html=True)
-def detect_gpu_support():
-    """Detect whether GPU acceleration is available"""
-    try:
-        # Check if we have a GPU via context creation
-        llm = LlamaCpp(
-            model_path=GEMMA_MODEL_PATH,
-            n_gpu_layers=1,
-            verbose=False
-        )
-        return True
-    except:
-        return False
 def main():
     set_page_style()
-    # System Status Section
     with st.sidebar:
         st.markdown("<h2 style='text-align: center;'>Control Panel</h2>", unsafe_allow_html=True)
@@ -657,7 +489,6 @@ def main():
             key="language_selector"
         )
         st.markdown("<p>Proficiency Level</p>", unsafe_allow_html=True)
         proficiency_level = st.radio(
             "Select your technical understanding:",
@@ -675,28 +506,6 @@ def main():
         if st.button("Clear Conversation", key="clear_button"):
             st.session_state.chat_history = []
             st.rerun()
-        # Model information
-        st.markdown("---")
-        st.markdown("<div class='system-info'>", unsafe_allow_html=True)
-        st.markdown("<h4>System Information</h4>", unsafe_allow_html=True)
-        st.markdown(f"**LLM**: Gemma 3 4B (LlamaCPP)")
-        st.markdown(f"**Embeddings**: {HF_EMBEDDINGS_MODEL.split('/')[-1]}")
-        st.markdown(f"**ASR**: faster-whisper-{WHISPER_MODEL_SIZE}")
-        # Show TTS model info if available
-        if TTS_AVAILABLE:
-            st.markdown(f"**TTS**: {TTS_MODEL.split('/')[-1]}")
-        else:
-            st.markdown("**TTS**: Not installed")
-            st.markdown("<small>Install with: pip install TTS</small>", unsafe_allow_html=True)
-        # Device information
-        gpu_available = detect_gpu_support()
-        device = "GPU" if gpu_available else "CPU"
-        st.markdown(f"**Running on**: {device}")
-        st.markdown("</div>", unsafe_allow_html=True)
     # Main content area
     st.markdown("""
@@ -714,15 +523,10 @@ def main():
         st.session_state.current_proficiency = proficiency_level
     if 'processing' not in st.session_state:
         st.session_state.processing = False
     if 'input_key' not in st.session_state:
         st.session_state.input_key = 0
-    # Check if the LLM is initialized properly
-    if st.session_state.qa_chain is None and not os.path.exists(GEMMA_MODEL_PATH):
-        st.error("⚠️ Gemma 3 model not found. Please download the model and place it in the correct location.")
-        st.info(f"Expected model path: {GEMMA_MODEL_PATH}")
-        st.stop()
     # Chat display container
     chat_container = st.container()
     with chat_container:
@@ -756,13 +560,13 @@ def main():
             )
         else:
             st.markdown("""
-                <p style="margin: 0;">📢 Record your question:</p>
             """, unsafe_allow_html=True)
             audio_bytes = audio_recorder(
                 pause_threshold=2.0,
                 sample_rate=16000,
                 text="🎤",
-                neutral_color="#1976d2",
                 recording_color="#e91e63"
             )
@@ -814,7 +618,7 @@ def main():
         # Display user message first
         with st.spinner("Processing your voice input..."):
             try:
-                original_text, translated_text, translated_audio = process_audio_with_local_models(
                     audio_bytes, target_language, proficiency_level
                 )

 import streamlit as st
+import speech_recognition as sr
 import tempfile
 import os, sys
+import openai
 import json
 import requests
 import base64
 from requests.auth import HTTPBasicAuth
 from typing import Tuple, Dict, List, Optional
 from langchain_community.vectorstores import FAISS
+from langchain_openai import OpenAIEmbeddings, ChatOpenAI
 from langchain.chains import ConversationalRetrievalChain, RetrievalQA
 from langchain.memory import ConversationBufferMemory
 from langchain.prompts import PromptTemplate
 import pandas as pd
 import pickle
 import time
 sys.path.append("../..")
 API_USERNAME = os.getenv('API_USERNAME')
 API_PASSWORD = os.getenv('API_PASSWORD')
 BASE_URL = os.getenv('BASE_URL')
+openai.api_key = os.environ["OPENAI_API_KEY"]
 # Technical terms to keep in English
 TECHNICAL_TERMS = [
     explanation: str = Field(..., description="Detailed explanation")
     additional_notes: Optional[str] = None
+def process_audio_with_openai(audio_bytes: bytes, target_language: str, proficiency_level: str) -> Tuple[str, str, bytes]:
+    """Enhanced audio processing using OpenAI's GPT-4 Audio model, considering proficiency level"""
+    api_key = os.getenv("OPENAI_API_KEY")
+    headers = {
+        "Authorization": f"Bearer {api_key}"
+    }
+    # Get transcription using the Whisper API instead of chat completions
     with tempfile.NamedTemporaryFile(suffix=".wav", delete=True) as temp_audio:
         temp_audio.write(audio_bytes)
         temp_audio.flush()
+        files = {"file": open(temp_audio.name, "rb")}
+        transcription_response = requests.post(
+            "https://api.openai.com/v1/audio/transcriptions",
+            headers={"Authorization": f"Bearer {api_key}"},
+            files=files,
+            data={"model": "whisper-1"}
+        )
+        transcription_data = transcription_response.json()
+        if "text" not in transcription_data:
+            raise Exception(f"Unexpected API response: {transcription_data}")
+        original_text = transcription_data["text"]
+    # Get translation with technical terms preserved and appropriate for proficiency level
+    translation_prompt = f"Translate to {target_language}, keeping technical terms unchanged: {', '.join(TECHNICAL_TERMS)}. "
+    translation_prompt += f"Adapt the language for a {proficiency_level.lower()} level of technical understanding."
+    translation_response = requests.post(
+        "https://api.openai.com/v1/chat/completions",
+        headers={"Content-Type": "application/json", "Authorization": f"Bearer {api_key}"},
+        json={
+            "model": "gpt-4o-mini",  # Changed from gpt-4o-audio-preview to gpt-4o-mini for text translation
+            "messages": [
+                {"role": "system", "content": translation_prompt},
+                {"role": "user", "content": original_text}
+            ]
         }
+    )
+    translation_data = translation_response.json()
+    if "choices" not in translation_data or len(translation_data["choices"]) == 0:
+        raise Exception(f"Unexpected translation API response: {translation_data}")
+    translated_text = translation_data['choices'][0]['message']['content']
     # Generate translated audio
+    audio_response = requests.post(
+        "https://api.openai.com/v1/audio/speech",
+        headers={"Content-Type": "application/json", "Authorization": f"Bearer {api_key}"},
+        json={
+            "model": "tts-1",
+            "input": translated_text,
+            "voice": "alloy",
+            "speed": 0.9 if proficiency_level == "Beginner" else 1.0  # Slower for beginners
+        }
+    )
+    if audio_response.status_code != 200:
+        raise Exception(f"Error generating audio: {audio_response.text}")
+    translated_audio = audio_response.content
+    return original_text, translated_text, translated_audio
 def initialize_chatbot(proficiency_level: str):
     """Initialize enhanced RAG system with memory, considering proficiency level"""
+    embedding_model = OpenAIEmbeddings(model="text-embedding-3-large")
     db = FAISS.load_local(DB_FAISS_PATH, embedding_model, allow_dangerous_deserialization=True)
     faiss_retriever = db.as_retriever()
         input_variables=["question", "context"]
     )
+    llm = ChatOpenAI(model_name="gpt-4o-mini", temperature=0.3)
     qa_chain = RetrievalQA.from_chain_type(
         llm=llm,
     # Translate if needed
     if target_language != "English":
         # Adjust complexity based on proficiency level
         complexity_instruction = {
             "Beginner": "Use simple language and avoid technical jargon.",
             "Advanced": "You can use technical language and detailed explanations."
         }
+        translation_response = requests.post(
+            "https://api.openai.com/v1/chat/completions",
+            headers={"Authorization": f"Bearer {os.getenv('OPENAI_API_KEY')}"},
+            json={
+                "model": "gpt-4o-mini",
+                "messages": [
+                    {"role": "system", "content": f"Translate to {target_language}, preserving technical terms: {', '.join(TECHNICAL_TERMS)}. {complexity_instruction[proficiency_level]}"},
+                    {"role": "user", "content": answer}
+                ]
+            }
+        )
+        answer = translation_response.json()['choices'][0]['message']['content']
     # Generate audio for the answer
+    audio_response = requests.post(
+        "https://api.openai.com/v1/audio/speech",
+        headers={"Authorization": f"Bearer {os.getenv('OPENAI_API_KEY')}"},
+        json={
+            "model": "tts-1",
+            "input": answer,
+            "voice": "alloy",
+            "speed": 0.9 if proficiency_level == "Beginner" else 1.0  # Slower for beginners
+        }
+    )
+    answer_audio = audio_response.content
     return answer, answer_audio
 def display_chat_message(is_user: bool, message: str, audio_bytes=None, is_loading=False):
+    """Display a chat message with modern styling and avatar"""
     message_class = "user-message" if is_user else "assistant-message"
     avatar_class = "user-avatar" if is_user else "assistant-avatar"
     content_class = "user-content" if is_user else "assistant-content"
     .chat-container {
         padding: 10px 0;
     }
     .user-message {
         display: flex;
         align-items: flex-start;
         margin-bottom: 24px;
     }
     .assistant-message {
         display: flex;
         align-items: flex-start;
         margin-bottom: 24px;
         flex-direction: row-reverse;
     }
     .message-avatar {
         width: 40px;
         height: 40px;
         align-items: center;
         justify-content: center;
         font-size: 18px;
+        color: white;
     }
     .user-avatar {
         background-color: var(--primary-color, #e91e63);
         margin-right: 12px;
     }
     .assistant-avatar {
+        background-color: #795548;
         margin-left: 12px;
     }
     .message-content {
+        background-color: var(--secondary-background-color, rgba(128, 128, 128, 0.15));
         padding: 12px 16px;
         border-radius: 18px;
         max-width: 75%;
         color: var(--text-color, inherit);
     }
     .user-content {
         border-top-left-radius: 4px;
     }
     .assistant-content {
         border-top-right-radius: 4px;
     }
     .audio-player {
         margin-top: 8px;
         width: 100%;
         border-radius: 12px;
         overflow: hidden;
     }
     .stAudio {
         width: 100% !important;
     }
     .stAudio > div {
         border-radius: 12px !important;
     }
     .title-container {
         text-align: center;
         padding: 15px;
         color: white;
         margin-bottom: 20px;
     }
     /* Improved input container with proper alignment and theme compatibility */
     .input-area {
         display: flex;
         width: 100%;
         border: 1px solid var(--input-border-color, rgba(128, 128, 128, 0.2));
     }
     .input-area .stTextInput {
         flex-grow: 1;
     }
     .stTextInput>div>div>input {
         background-color: transparent !important;
         border: none !important;
         padding: 8px 0 !important;
         box-shadow: none !important;
     }
     /* Remove padding and margin from the container columns */
     .input-container-col .stTextInput {
         margin-bottom: 0 !important;
     }
     .button-col div {
         display: flex;
         justify-content: flex-end;
     }
     .send-button {
         background-color: var(--primary-color, #1976d2);
         color: white;
         padding: 0 !important;
         min-height: 0 !important;
     }
     /* Loading indicator animation */
     .loading-message {
         min-width: 70px;
     }
     .typing-indicator {
         display: flex;
         align-items: center;
         justify-content: center;
     }
     .typing-indicator span {
         height: 8px;
         width: 8px;
         margin: 0 2px;
+        background-color: var(--text-color, #9E9E9E);
         display: block;
         border-radius: 50%;
         opacity: 0.4;
     }
     .typing-indicator span:nth-of-type(1) {
         animation: typing 1s infinite;
     }
     .typing-indicator span:nth-of-type(3) {
         animation: typing 1s 0.4s infinite;
     }
     @keyframes typing {
         0% {
             transform: translateY(0px);
             opacity: 0.4;
         }
     }
     /* Align the columns properly */
     .stHorizontal .stColumn {
         padding-left: 0 !important;
         padding-right: 0 !important;
     }
+    /* Add CSS variables for theme detection */
+    :root {
+        --primary-color: #1976d2;
+        --secondary-background-color: rgba(128, 128, 128, 0.15);
+        --text-color: inherit;
+        --input-bg-color: rgba(128, 128, 128, 0.1);
+        --input-border-color: rgba(128, 128, 128, 0.2);
     }
+    /* Dark mode specific adjustments */
     @media (prefers-color-scheme: dark) {
+        :root {
+            --secondary-background-color: rgba(70, 70, 70, 0.3);
+            --input-bg-color: rgba(70, 70, 70, 0.2);
+            --input-border-color: rgba(100, 100, 100, 0.3);
         }
+        .message-content {
+            color: rgba(255, 255, 255, 0.9);
+        }
+        .stTextInput>div>div>input {
+            color: rgba(255, 255, 255, 0.9) !important;
         }
     }
     </style>
     """, unsafe_allow_html=True)
 def main():
     set_page_style()
+    # Sidebar configuration
     with st.sidebar:
         st.markdown("<h2 style='text-align: center;'>Control Panel</h2>", unsafe_allow_html=True)
             key="language_selector"
         )
         st.markdown("<p>Proficiency Level</p>", unsafe_allow_html=True)
         proficiency_level = st.radio(
             "Select your technical understanding:",
         if st.button("Clear Conversation", key="clear_button"):
             st.session_state.chat_history = []
             st.rerun()
     # Main content area
     st.markdown("""
         st.session_state.current_proficiency = proficiency_level
     if 'processing' not in st.session_state:
         st.session_state.processing = False
     if 'input_key' not in st.session_state:
         st.session_state.input_key = 0
     # Chat display container
     chat_container = st.container()
     with chat_container:
             )
         else:
             st.markdown("""
+                <p style="margin: 0; color: var(--text-color, inherit);">📢 Record your question:</p>
             """, unsafe_allow_html=True)
             audio_bytes = audio_recorder(
                 pause_threshold=2.0,
                 sample_rate=16000,
                 text="🎤",
+                neutral_color="var(--primary-color, #1976d2)",
                 recording_color="#e91e63"
             )
         # Display user message first
         with st.spinner("Processing your voice input..."):
             try:
+                original_text, translated_text, translated_audio = process_audio_with_openai(
                     audio_bytes, target_language, proficiency_level
                 )