Ronochieng commited on
Commit
c6ffb8c
·
verified ·
1 Parent(s): 4810e8f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +111 -307
app.py CHANGED
@@ -1,6 +1,8 @@
1
  import streamlit as st
 
2
  import tempfile
3
  import os, sys
 
4
  import json
5
  import requests
6
  import base64
@@ -8,8 +10,7 @@ from io import BytesIO
8
  from requests.auth import HTTPBasicAuth
9
  from typing import Tuple, Dict, List, Optional
10
  from langchain_community.vectorstores import FAISS
11
- from langchain_huggingface import HuggingFaceEmbeddings
12
- from langchain_community.llms import LlamaCpp
13
  from langchain.chains import ConversationalRetrievalChain, RetrievalQA
14
  from langchain.memory import ConversationBufferMemory
15
  from langchain.prompts import PromptTemplate
@@ -23,16 +24,6 @@ from dotenv import load_dotenv, find_dotenv
23
  import pandas as pd
24
  import pickle
25
  import time
26
- import numpy as np
27
- from faster_whisper import WhisperModel
28
- import soundfile as sf
29
- import io
30
-
31
- try:
32
- from TTS.api import TTS
33
- TTS_AVAILABLE = True
34
- except ImportError:
35
- TTS_AVAILABLE = False
36
 
37
  sys.path.append("../..")
38
 
@@ -43,10 +34,8 @@ DB_FAISS_PATH = 'vectorstore/db_faiss'
43
  API_USERNAME = os.getenv('API_USERNAME')
44
  API_PASSWORD = os.getenv('API_PASSWORD')
45
  BASE_URL = os.getenv('BASE_URL')
46
- GEMMA_MODEL_PATH = os.getenv('GEMMA_MODEL_PATH', 'stduhpf/google-gemma-3-4b-it-qat-q4_0-gguf-small')
47
- HF_EMBEDDINGS_MODEL = os.getenv('HF_EMBEDDINGS_MODEL', 'BAAI/bge-small-en-v1.5')
48
- WHISPER_MODEL_SIZE = os.getenv('WHISPER_MODEL_SIZE', 'tiny')
49
- TTS_MODEL = os.getenv('TTS_MODEL', 'tts_models/en/ljspeech/tacotron2-DDC')
50
 
51
  # Technical terms to keep in English
52
  TECHNICAL_TERMS = [
@@ -71,137 +60,76 @@ class ProductResponse(BaseModel):
71
  explanation: str = Field(..., description="Detailed explanation")
72
  additional_notes: Optional[str] = None
73
 
74
- @st.cache_resource
75
- def load_asr_model():
76
- """Load and cache the Faster Whisper model"""
77
- try:
78
- model = WhisperModel(WHISPER_MODEL_SIZE, device="cpu", compute_type="int8")
79
- return model
80
- except Exception as e:
81
- st.error(f"Error loading speech model: {str(e)}")
82
- return None
83
-
84
- @st.cache_resource
85
- def load_tts_model():
86
- """Load and cache the TTS model"""
87
- if not TTS_AVAILABLE:
88
- st.warning("TTS library not available. Install with: pip install TTS")
89
- return None
90
 
91
- try:
92
- tts = TTS(model_name=TTS_MODEL)
93
- return tts
94
- except Exception as e:
95
- st.error(f"Error loading TTS model: {str(e)}")
96
- return None
97
-
98
- def process_audio_with_local_models(audio_bytes: bytes, target_language: str, proficiency_level: str) -> Tuple[str, str, bytes]:
99
- """Process audio using Faster Whisper for speech recognition"""
100
- # Convert audio bytes to format compatible with soundfile
101
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=True) as temp_audio:
102
  temp_audio.write(audio_bytes)
103
  temp_audio.flush()
104
 
105
- # Load the ASR model
106
- asr_model = load_asr_model()
107
-
108
- if asr_model is None:
109
- return "Could not process audio due to ASR model loading error", "", b""
110
-
111
- # Transcribe audio
112
- segments, info = asr_model.transcribe(temp_audio.name, beam_size=5)
113
- original_text = " ".join([segment.text for segment in segments])
114
-
115
- # Get translation using Gemma model (reuse the existing LLM)
116
- if target_language != "English":
117
- # Initialize the Gemma LLM
118
- llm = initialize_llm()
119
 
120
- # Adjust complexity based on proficiency level
121
- complexity_instruction = {
122
- "Beginner": "Use simple language and avoid technical jargon.",
123
- "Intermediate": "Use a balanced mix of technical and simplified language.",
124
- "Advanced": "You can use technical language and detailed explanations."
 
 
 
 
 
 
 
 
 
 
 
 
 
125
  }
 
 
 
 
 
126
 
127
- translation_prompt = f"""Translate the following text to {target_language}, keeping these technical terms unchanged: {', '.join(TECHNICAL_TERMS)}. {complexity_instruction[proficiency_level]}
128
-
129
- Original text: {original_text}
130
-
131
- Translation:"""
132
-
133
- translated_text = llm.invoke(translation_prompt)
134
- else:
135
- translated_text = original_text
136
 
137
  # Generate translated audio
138
- translated_audio = generate_speech(translated_text)
139
-
140
- return original_text, translated_text, translated_audio
141
-
142
- def generate_speech(text: str) -> bytes:
143
- """Generate speech from text using a local TTS model"""
144
- tts_model = load_tts_model()
145
-
146
- if tts_model:
147
- try:
148
- with tempfile.NamedTemporaryFile(suffix=".wav", delete=True) as temp_audio:
149
- # Generate speech file
150
- tts_model.tts_to_file(text=text, file_path=temp_audio.name)
151
- temp_audio.flush()
152
-
153
- # Read the audio file as bytes
154
- with open(temp_audio.name, "rb") as f:
155
- audio_bytes = f.read()
156
-
157
- return audio_bytes
158
- except Exception as e:
159
- st.error(f"Error generating speech: {str(e)}")
160
- return b""
161
 
162
- # Return empty bytes if TTS model is not available
163
- return b""
164
-
165
- @st.cache_resource
166
- def initialize_llm():
167
- """Initialize the Gemma 3 4B model using LlamaCpp"""
168
- try:
169
- import os
170
-
171
- # Define model path
172
- model_path = "gemma-3-4b-it-q4_0_s.gguf"
173
-
174
- # Check if model exists, if not, download it
175
- if not os.path.exists(model_path):
176
- st.info("Model file not found. Downloading Gemma model (this may take a while)...")
177
- # You can use huggingface_hub to download the model
178
- from huggingface_hub import hf_hub_download
179
-
180
- model_path = hf_hub_download(
181
- repo_id="stduhpf/google-gemma-3-4b-it-qat-q4_0-gguf-small",
182
- filename="gemma-3-4b-it-q4_0_s.gguf",
183
- cache_dir="."
184
- )
185
- st.success(f"Model downloaded to {model_path}")
186
 
187
- # Use LangChain's LlamaCpp integration
188
- llm = LlamaCpp(
189
- model_path=model_path,
190
- temperature=0.7,
191
- max_tokens=2048,
192
- n_ctx=4096,
193
- top_p=1,
194
- verbose=False,
195
- )
196
- return llm
197
- except Exception as e:
198
- st.error(f"Error loading model: {str(e)}")
199
- return None
200
 
201
  def initialize_chatbot(proficiency_level: str):
202
  """Initialize enhanced RAG system with memory, considering proficiency level"""
203
- # Use HuggingFace embeddings instead of OpenAI
204
- embedding_model = HuggingFaceEmbeddings(model_name=HF_EMBEDDINGS_MODEL)
205
  db = FAISS.load_local(DB_FAISS_PATH, embedding_model, allow_dangerous_deserialization=True)
206
  faiss_retriever = db.as_retriever()
207
 
@@ -229,12 +157,7 @@ def initialize_chatbot(proficiency_level: str):
229
  input_variables=["question", "context"]
230
  )
231
 
232
- # Initialize the Gemma LLM
233
- llm = initialize_llm()
234
-
235
- if llm is None:
236
- st.error("Failed to initialize the LLM. Please check the model path and try again.")
237
- return None
238
 
239
  qa_chain = RetrievalQA.from_chain_type(
240
  llm=llm,
@@ -294,9 +217,6 @@ def process_text_input(user_input: str, target_language: str, proficiency_level:
294
 
295
  # Translate if needed
296
  if target_language != "English":
297
- # Initialize the Gemma LLM
298
- llm = initialize_llm()
299
-
300
  # Adjust complexity based on proficiency level
301
  complexity_instruction = {
302
  "Beginner": "Use simple language and avoid technical jargon.",
@@ -304,21 +224,36 @@ def process_text_input(user_input: str, target_language: str, proficiency_level:
304
  "Advanced": "You can use technical language and detailed explanations."
305
  }
306
 
307
- translation_prompt = f"""Translate the following text to {target_language}, preserving these technical terms: {', '.join(TECHNICAL_TERMS)}. {complexity_instruction[proficiency_level]}
308
-
309
- Text to translate: {answer}
310
-
311
- Translation:"""
312
-
313
- answer = llm.invoke(translation_prompt)
 
 
 
 
 
314
 
315
  # Generate audio for the answer
316
- answer_audio = generate_speech(answer)
 
 
 
 
 
 
 
 
 
 
317
 
318
  return answer, answer_audio
319
 
320
  def display_chat_message(is_user: bool, message: str, audio_bytes=None, is_loading=False):
321
- """Display a chat message with modern styling and avatar with theme compatibility"""
322
  message_class = "user-message" if is_user else "assistant-message"
323
  avatar_class = "user-avatar" if is_user else "assistant-avatar"
324
  content_class = "user-content" if is_user else "assistant-content"
@@ -364,20 +299,17 @@ def set_page_style():
364
  .chat-container {
365
  padding: 10px 0;
366
  }
367
-
368
  .user-message {
369
  display: flex;
370
  align-items: flex-start;
371
  margin-bottom: 24px;
372
  }
373
-
374
  .assistant-message {
375
  display: flex;
376
  align-items: flex-start;
377
  margin-bottom: 24px;
378
  flex-direction: row-reverse;
379
  }
380
-
381
  .message-avatar {
382
  width: 40px;
383
  height: 40px;
@@ -386,92 +318,41 @@ def set_page_style():
386
  align-items: center;
387
  justify-content: center;
388
  font-size: 18px;
389
- flex-shrink: 0;
390
  }
391
-
392
- /* Color-scheme adaptive styles */
393
  .user-avatar {
394
  background-color: var(--primary-color, #e91e63);
395
  margin-right: 12px;
396
- color: white;
397
  }
398
-
399
  .assistant-avatar {
400
- background-color: var(--secondary-color, #795548);
401
  margin-left: 12px;
402
- color: white;
403
  }
404
-
405
  .message-content {
406
- background-color: var(--background-color, rgba(128, 128, 128, 0.15));
407
  padding: 12px 16px;
408
  border-radius: 18px;
409
  max-width: 75%;
410
  color: var(--text-color, inherit);
411
  }
412
-
413
- /* Apply custom styles for light/dark mode */
414
- @media (prefers-color-scheme: dark) {
415
- .message-content {
416
- background-color: rgba(255, 255, 255, 0.1);
417
- color: rgba(255, 255, 255, 0.9);
418
- }
419
-
420
- .user-avatar {
421
- background-color: #e91e63;
422
- }
423
-
424
- .assistant-avatar {
425
- background-color: #795548;
426
- }
427
-
428
- .typing-indicator span {
429
- background-color: rgba(255, 255, 255, 0.6);
430
- }
431
- }
432
-
433
- @media (prefers-color-scheme: light) {
434
- .message-content {
435
- background-color: rgba(0, 0, 0, 0.05);
436
- color: rgba(0, 0, 0, 0.9);
437
- }
438
-
439
- .user-avatar {
440
- background-color: #e91e63;
441
- }
442
-
443
- .assistant-avatar {
444
- background-color: #795548;
445
- }
446
-
447
- .typing-indicator span {
448
- background-color: rgba(0, 0, 0, 0.6);
449
- }
450
- }
451
-
452
  .user-content {
453
  border-top-left-radius: 4px;
454
  }
455
-
456
  .assistant-content {
457
  border-top-right-radius: 4px;
458
  }
459
-
460
  .audio-player {
461
  margin-top: 8px;
462
  width: 100%;
463
  border-radius: 12px;
464
  overflow: hidden;
465
  }
466
-
467
  .stAudio {
468
  width: 100% !important;
469
  }
470
-
471
  .stAudio > div {
472
  border-radius: 12px !important;
473
  }
474
-
475
  .title-container {
476
  text-align: center;
477
  padding: 15px;
@@ -480,7 +361,6 @@ def set_page_style():
480
  color: white;
481
  margin-bottom: 20px;
482
  }
483
-
484
  /* Improved input container with proper alignment and theme compatibility */
485
  .input-area {
486
  display: flex;
@@ -493,44 +373,23 @@ def set_page_style():
493
  width: 100%;
494
  border: 1px solid var(--input-border-color, rgba(128, 128, 128, 0.2));
495
  }
496
-
497
- /* Adapt input fields to light/dark mode */
498
- @media (prefers-color-scheme: dark) {
499
- .input-area {
500
- background-color: rgba(255, 255, 255, 0.1);
501
- border-color: rgba(255, 255, 255, 0.2);
502
- }
503
- }
504
-
505
- @media (prefers-color-scheme: light) {
506
- .input-area {
507
- background-color: rgba(0, 0, 0, 0.05);
508
- border-color: rgba(0, 0, 0, 0.1);
509
- }
510
- }
511
-
512
  .input-area .stTextInput {
513
  flex-grow: 1;
514
  }
515
-
516
  .stTextInput>div>div>input {
517
  background-color: transparent !important;
518
  border: none !important;
519
  padding: 8px 0 !important;
520
  box-shadow: none !important;
521
- color: var(--text-color, inherit) !important;
522
  }
523
-
524
  /* Remove padding and margin from the container columns */
525
  .input-container-col .stTextInput {
526
  margin-bottom: 0 !important;
527
  }
528
-
529
  .button-col div {
530
  display: flex;
531
  justify-content: flex-end;
532
  }
533
-
534
  .send-button {
535
  background-color: var(--primary-color, #1976d2);
536
  color: white;
@@ -543,28 +402,24 @@ def set_page_style():
543
  padding: 0 !important;
544
  min-height: 0 !important;
545
  }
546
-
547
  /* Loading indicator animation */
548
  .loading-message {
549
  min-width: 70px;
550
  }
551
-
552
  .typing-indicator {
553
  display: flex;
554
  align-items: center;
555
  justify-content: center;
556
  }
557
-
558
  .typing-indicator span {
559
  height: 8px;
560
  width: 8px;
561
  margin: 0 2px;
562
- background-color: #9E9E9E;
563
  display: block;
564
  border-radius: 50%;
565
  opacity: 0.4;
566
  }
567
-
568
  .typing-indicator span:nth-of-type(1) {
569
  animation: typing 1s infinite;
570
  }
@@ -574,7 +429,6 @@ def set_page_style():
574
  .typing-indicator span:nth-of-type(3) {
575
  animation: typing 1s 0.4s infinite;
576
  }
577
-
578
  @keyframes typing {
579
  0% {
580
  transform: translateY(0px);
@@ -589,64 +443,42 @@ def set_page_style():
589
  opacity: 0.4;
590
  }
591
  }
592
-
593
  /* Align the columns properly */
594
  .stHorizontal .stColumn {
595
  padding-left: 0 !important;
596
  padding-right: 0 !important;
597
  }
598
 
599
- /* Model loading indicator - theme adaptive */
600
- .model-loading {
601
- background-color: var(--error-bg-color, #ffebee);
602
- color: var(--error-text-color, #b71c1c);
603
- padding: 10px;
604
- border-radius: 8px;
605
- margin-bottom: 15px;
606
- border-left: 4px solid var(--error-border-color, #f44336);
607
  }
608
 
 
609
  @media (prefers-color-scheme: dark) {
610
- .model-loading {
611
- background-color: rgba(244, 67, 54, 0.2);
612
- color: #ef9a9a;
613
- border-left-color: #ef5350;
614
  }
615
- }
616
-
617
- /* System information section */
618
- .system-info {
619
- background-color: var(--info-bg-color, rgba(33, 150, 243, 0.1));
620
- padding: 10px;
621
- border-radius: 8px;
622
- margin-top: 15px;
623
- }
624
-
625
- @media (prefers-color-scheme: dark) {
626
- .system-info {
627
- background-color: rgba(33, 150, 243, 0.15);
628
  }
629
  }
630
  </style>
631
  """, unsafe_allow_html=True)
632
 
633
- def detect_gpu_support():
634
- """Detect whether GPU acceleration is available"""
635
- try:
636
- # Check if we have a GPU via context creation
637
- llm = LlamaCpp(
638
- model_path=GEMMA_MODEL_PATH,
639
- n_gpu_layers=1,
640
- verbose=False
641
- )
642
- return True
643
- except:
644
- return False
645
-
646
  def main():
647
  set_page_style()
648
 
649
- # System Status Section
650
  with st.sidebar:
651
  st.markdown("<h2 style='text-align: center;'>Control Panel</h2>", unsafe_allow_html=True)
652
 
@@ -657,7 +489,6 @@ def main():
657
  key="language_selector"
658
  )
659
 
660
-
661
  st.markdown("<p>Proficiency Level</p>", unsafe_allow_html=True)
662
  proficiency_level = st.radio(
663
  "Select your technical understanding:",
@@ -675,28 +506,6 @@ def main():
675
  if st.button("Clear Conversation", key="clear_button"):
676
  st.session_state.chat_history = []
677
  st.rerun()
678
-
679
- # Model information
680
- st.markdown("---")
681
- st.markdown("<div class='system-info'>", unsafe_allow_html=True)
682
- st.markdown("<h4>System Information</h4>", unsafe_allow_html=True)
683
- st.markdown(f"**LLM**: Gemma 3 4B (LlamaCPP)")
684
- st.markdown(f"**Embeddings**: {HF_EMBEDDINGS_MODEL.split('/')[-1]}")
685
- st.markdown(f"**ASR**: faster-whisper-{WHISPER_MODEL_SIZE}")
686
-
687
- # Show TTS model info if available
688
- if TTS_AVAILABLE:
689
- st.markdown(f"**TTS**: {TTS_MODEL.split('/')[-1]}")
690
- else:
691
- st.markdown("**TTS**: Not installed")
692
- st.markdown("<small>Install with: pip install TTS</small>", unsafe_allow_html=True)
693
-
694
- # Device information
695
- gpu_available = detect_gpu_support()
696
- device = "GPU" if gpu_available else "CPU"
697
- st.markdown(f"**Running on**: {device}")
698
-
699
- st.markdown("</div>", unsafe_allow_html=True)
700
 
701
  # Main content area
702
  st.markdown("""
@@ -714,15 +523,10 @@ def main():
714
  st.session_state.current_proficiency = proficiency_level
715
  if 'processing' not in st.session_state:
716
  st.session_state.processing = False
 
717
  if 'input_key' not in st.session_state:
718
  st.session_state.input_key = 0
719
 
720
- # Check if the LLM is initialized properly
721
- if st.session_state.qa_chain is None and not os.path.exists(GEMMA_MODEL_PATH):
722
- st.error("⚠️ Gemma 3 model not found. Please download the model and place it in the correct location.")
723
- st.info(f"Expected model path: {GEMMA_MODEL_PATH}")
724
- st.stop()
725
-
726
  # Chat display container
727
  chat_container = st.container()
728
  with chat_container:
@@ -756,13 +560,13 @@ def main():
756
  )
757
  else:
758
  st.markdown("""
759
- <p style="margin: 0;">📢 Record your question:</p>
760
  """, unsafe_allow_html=True)
761
  audio_bytes = audio_recorder(
762
  pause_threshold=2.0,
763
  sample_rate=16000,
764
  text="🎤",
765
- neutral_color="#1976d2",
766
  recording_color="#e91e63"
767
  )
768
 
@@ -814,7 +618,7 @@ def main():
814
  # Display user message first
815
  with st.spinner("Processing your voice input..."):
816
  try:
817
- original_text, translated_text, translated_audio = process_audio_with_local_models(
818
  audio_bytes, target_language, proficiency_level
819
  )
820
 
 
1
  import streamlit as st
2
+ import speech_recognition as sr
3
  import tempfile
4
  import os, sys
5
+ import openai
6
  import json
7
  import requests
8
  import base64
 
10
  from requests.auth import HTTPBasicAuth
11
  from typing import Tuple, Dict, List, Optional
12
  from langchain_community.vectorstores import FAISS
13
+ from langchain_openai import OpenAIEmbeddings, ChatOpenAI
 
14
  from langchain.chains import ConversationalRetrievalChain, RetrievalQA
15
  from langchain.memory import ConversationBufferMemory
16
  from langchain.prompts import PromptTemplate
 
24
  import pandas as pd
25
  import pickle
26
  import time
 
 
 
 
 
 
 
 
 
 
27
 
28
  sys.path.append("../..")
29
 
 
34
  API_USERNAME = os.getenv('API_USERNAME')
35
  API_PASSWORD = os.getenv('API_PASSWORD')
36
  BASE_URL = os.getenv('BASE_URL')
37
+
38
+ openai.api_key = os.environ["OPENAI_API_KEY"]
 
 
39
 
40
  # Technical terms to keep in English
41
  TECHNICAL_TERMS = [
 
60
  explanation: str = Field(..., description="Detailed explanation")
61
  additional_notes: Optional[str] = None
62
 
63
+ def process_audio_with_openai(audio_bytes: bytes, target_language: str, proficiency_level: str) -> Tuple[str, str, bytes]:
64
+ """Enhanced audio processing using OpenAI's GPT-4 Audio model, considering proficiency level"""
65
+ api_key = os.getenv("OPENAI_API_KEY")
66
+ headers = {
67
+ "Authorization": f"Bearer {api_key}"
68
+ }
 
 
 
 
 
 
 
 
 
 
69
 
70
+ # Get transcription using the Whisper API instead of chat completions
 
 
 
 
 
 
 
 
 
71
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=True) as temp_audio:
72
  temp_audio.write(audio_bytes)
73
  temp_audio.flush()
74
 
75
+ files = {"file": open(temp_audio.name, "rb")}
76
+ transcription_response = requests.post(
77
+ "https://api.openai.com/v1/audio/transcriptions",
78
+ headers={"Authorization": f"Bearer {api_key}"},
79
+ files=files,
80
+ data={"model": "whisper-1"}
81
+ )
82
+ transcription_data = transcription_response.json()
 
 
 
 
 
 
83
 
84
+ if "text" not in transcription_data:
85
+ raise Exception(f"Unexpected API response: {transcription_data}")
86
+
87
+ original_text = transcription_data["text"]
88
+
89
+ # Get translation with technical terms preserved and appropriate for proficiency level
90
+ translation_prompt = f"Translate to {target_language}, keeping technical terms unchanged: {', '.join(TECHNICAL_TERMS)}. "
91
+ translation_prompt += f"Adapt the language for a {proficiency_level.lower()} level of technical understanding."
92
+
93
+ translation_response = requests.post(
94
+ "https://api.openai.com/v1/chat/completions",
95
+ headers={"Content-Type": "application/json", "Authorization": f"Bearer {api_key}"},
96
+ json={
97
+ "model": "gpt-4o-mini", # Changed from gpt-4o-audio-preview to gpt-4o-mini for text translation
98
+ "messages": [
99
+ {"role": "system", "content": translation_prompt},
100
+ {"role": "user", "content": original_text}
101
+ ]
102
  }
103
+ )
104
+
105
+ translation_data = translation_response.json()
106
+ if "choices" not in translation_data or len(translation_data["choices"]) == 0:
107
+ raise Exception(f"Unexpected translation API response: {translation_data}")
108
 
109
+ translated_text = translation_data['choices'][0]['message']['content']
 
 
 
 
 
 
 
 
110
 
111
  # Generate translated audio
112
+ audio_response = requests.post(
113
+ "https://api.openai.com/v1/audio/speech",
114
+ headers={"Content-Type": "application/json", "Authorization": f"Bearer {api_key}"},
115
+ json={
116
+ "model": "tts-1",
117
+ "input": translated_text,
118
+ "voice": "alloy",
119
+ "speed": 0.9 if proficiency_level == "Beginner" else 1.0 # Slower for beginners
120
+ }
121
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
122
 
123
+ if audio_response.status_code != 200:
124
+ raise Exception(f"Error generating audio: {audio_response.text}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
 
126
+ translated_audio = audio_response.content
127
+
128
+ return original_text, translated_text, translated_audio
 
 
 
 
 
 
 
 
 
 
129
 
130
  def initialize_chatbot(proficiency_level: str):
131
  """Initialize enhanced RAG system with memory, considering proficiency level"""
132
+ embedding_model = OpenAIEmbeddings(model="text-embedding-3-large")
 
133
  db = FAISS.load_local(DB_FAISS_PATH, embedding_model, allow_dangerous_deserialization=True)
134
  faiss_retriever = db.as_retriever()
135
 
 
157
  input_variables=["question", "context"]
158
  )
159
 
160
+ llm = ChatOpenAI(model_name="gpt-4o-mini", temperature=0.3)
 
 
 
 
 
161
 
162
  qa_chain = RetrievalQA.from_chain_type(
163
  llm=llm,
 
217
 
218
  # Translate if needed
219
  if target_language != "English":
 
 
 
220
  # Adjust complexity based on proficiency level
221
  complexity_instruction = {
222
  "Beginner": "Use simple language and avoid technical jargon.",
 
224
  "Advanced": "You can use technical language and detailed explanations."
225
  }
226
 
227
+ translation_response = requests.post(
228
+ "https://api.openai.com/v1/chat/completions",
229
+ headers={"Authorization": f"Bearer {os.getenv('OPENAI_API_KEY')}"},
230
+ json={
231
+ "model": "gpt-4o-mini",
232
+ "messages": [
233
+ {"role": "system", "content": f"Translate to {target_language}, preserving technical terms: {', '.join(TECHNICAL_TERMS)}. {complexity_instruction[proficiency_level]}"},
234
+ {"role": "user", "content": answer}
235
+ ]
236
+ }
237
+ )
238
+ answer = translation_response.json()['choices'][0]['message']['content']
239
 
240
  # Generate audio for the answer
241
+ audio_response = requests.post(
242
+ "https://api.openai.com/v1/audio/speech",
243
+ headers={"Authorization": f"Bearer {os.getenv('OPENAI_API_KEY')}"},
244
+ json={
245
+ "model": "tts-1",
246
+ "input": answer,
247
+ "voice": "alloy",
248
+ "speed": 0.9 if proficiency_level == "Beginner" else 1.0 # Slower for beginners
249
+ }
250
+ )
251
+ answer_audio = audio_response.content
252
 
253
  return answer, answer_audio
254
 
255
  def display_chat_message(is_user: bool, message: str, audio_bytes=None, is_loading=False):
256
+ """Display a chat message with modern styling and avatar"""
257
  message_class = "user-message" if is_user else "assistant-message"
258
  avatar_class = "user-avatar" if is_user else "assistant-avatar"
259
  content_class = "user-content" if is_user else "assistant-content"
 
299
  .chat-container {
300
  padding: 10px 0;
301
  }
 
302
  .user-message {
303
  display: flex;
304
  align-items: flex-start;
305
  margin-bottom: 24px;
306
  }
 
307
  .assistant-message {
308
  display: flex;
309
  align-items: flex-start;
310
  margin-bottom: 24px;
311
  flex-direction: row-reverse;
312
  }
 
313
  .message-avatar {
314
  width: 40px;
315
  height: 40px;
 
318
  align-items: center;
319
  justify-content: center;
320
  font-size: 18px;
321
+ color: white;
322
  }
 
 
323
  .user-avatar {
324
  background-color: var(--primary-color, #e91e63);
325
  margin-right: 12px;
 
326
  }
 
327
  .assistant-avatar {
328
+ background-color: #795548;
329
  margin-left: 12px;
 
330
  }
 
331
  .message-content {
332
+ background-color: var(--secondary-background-color, rgba(128, 128, 128, 0.15));
333
  padding: 12px 16px;
334
  border-radius: 18px;
335
  max-width: 75%;
336
  color: var(--text-color, inherit);
337
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
338
  .user-content {
339
  border-top-left-radius: 4px;
340
  }
 
341
  .assistant-content {
342
  border-top-right-radius: 4px;
343
  }
 
344
  .audio-player {
345
  margin-top: 8px;
346
  width: 100%;
347
  border-radius: 12px;
348
  overflow: hidden;
349
  }
 
350
  .stAudio {
351
  width: 100% !important;
352
  }
 
353
  .stAudio > div {
354
  border-radius: 12px !important;
355
  }
 
356
  .title-container {
357
  text-align: center;
358
  padding: 15px;
 
361
  color: white;
362
  margin-bottom: 20px;
363
  }
 
364
  /* Improved input container with proper alignment and theme compatibility */
365
  .input-area {
366
  display: flex;
 
373
  width: 100%;
374
  border: 1px solid var(--input-border-color, rgba(128, 128, 128, 0.2));
375
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
376
  .input-area .stTextInput {
377
  flex-grow: 1;
378
  }
 
379
  .stTextInput>div>div>input {
380
  background-color: transparent !important;
381
  border: none !important;
382
  padding: 8px 0 !important;
383
  box-shadow: none !important;
 
384
  }
 
385
  /* Remove padding and margin from the container columns */
386
  .input-container-col .stTextInput {
387
  margin-bottom: 0 !important;
388
  }
 
389
  .button-col div {
390
  display: flex;
391
  justify-content: flex-end;
392
  }
 
393
  .send-button {
394
  background-color: var(--primary-color, #1976d2);
395
  color: white;
 
402
  padding: 0 !important;
403
  min-height: 0 !important;
404
  }
 
405
  /* Loading indicator animation */
406
  .loading-message {
407
  min-width: 70px;
408
  }
 
409
  .typing-indicator {
410
  display: flex;
411
  align-items: center;
412
  justify-content: center;
413
  }
 
414
  .typing-indicator span {
415
  height: 8px;
416
  width: 8px;
417
  margin: 0 2px;
418
+ background-color: var(--text-color, #9E9E9E);
419
  display: block;
420
  border-radius: 50%;
421
  opacity: 0.4;
422
  }
 
423
  .typing-indicator span:nth-of-type(1) {
424
  animation: typing 1s infinite;
425
  }
 
429
  .typing-indicator span:nth-of-type(3) {
430
  animation: typing 1s 0.4s infinite;
431
  }
 
432
  @keyframes typing {
433
  0% {
434
  transform: translateY(0px);
 
443
  opacity: 0.4;
444
  }
445
  }
 
446
  /* Align the columns properly */
447
  .stHorizontal .stColumn {
448
  padding-left: 0 !important;
449
  padding-right: 0 !important;
450
  }
451
 
452
+ /* Add CSS variables for theme detection */
453
+ :root {
454
+ --primary-color: #1976d2;
455
+ --secondary-background-color: rgba(128, 128, 128, 0.15);
456
+ --text-color: inherit;
457
+ --input-bg-color: rgba(128, 128, 128, 0.1);
458
+ --input-border-color: rgba(128, 128, 128, 0.2);
 
459
  }
460
 
461
+ /* Dark mode specific adjustments */
462
  @media (prefers-color-scheme: dark) {
463
+ :root {
464
+ --secondary-background-color: rgba(70, 70, 70, 0.3);
465
+ --input-bg-color: rgba(70, 70, 70, 0.2);
466
+ --input-border-color: rgba(100, 100, 100, 0.3);
467
  }
468
+ .message-content {
469
+ color: rgba(255, 255, 255, 0.9);
470
+ }
471
+ .stTextInput>div>div>input {
472
+ color: rgba(255, 255, 255, 0.9) !important;
 
 
 
 
 
 
 
 
473
  }
474
  }
475
  </style>
476
  """, unsafe_allow_html=True)
477
 
 
 
 
 
 
 
 
 
 
 
 
 
 
478
  def main():
479
  set_page_style()
480
 
481
+ # Sidebar configuration
482
  with st.sidebar:
483
  st.markdown("<h2 style='text-align: center;'>Control Panel</h2>", unsafe_allow_html=True)
484
 
 
489
  key="language_selector"
490
  )
491
 
 
492
  st.markdown("<p>Proficiency Level</p>", unsafe_allow_html=True)
493
  proficiency_level = st.radio(
494
  "Select your technical understanding:",
 
506
  if st.button("Clear Conversation", key="clear_button"):
507
  st.session_state.chat_history = []
508
  st.rerun()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
509
 
510
  # Main content area
511
  st.markdown("""
 
523
  st.session_state.current_proficiency = proficiency_level
524
  if 'processing' not in st.session_state:
525
  st.session_state.processing = False
526
+
527
  if 'input_key' not in st.session_state:
528
  st.session_state.input_key = 0
529
 
 
 
 
 
 
 
530
  # Chat display container
531
  chat_container = st.container()
532
  with chat_container:
 
560
  )
561
  else:
562
  st.markdown("""
563
+ <p style="margin: 0; color: var(--text-color, inherit);">📢 Record your question:</p>
564
  """, unsafe_allow_html=True)
565
  audio_bytes = audio_recorder(
566
  pause_threshold=2.0,
567
  sample_rate=16000,
568
  text="🎤",
569
+ neutral_color="var(--primary-color, #1976d2)",
570
  recording_color="#e91e63"
571
  )
572
 
 
618
  # Display user message first
619
  with st.spinner("Processing your voice input..."):
620
  try:
621
+ original_text, translated_text, translated_audio = process_audio_with_openai(
622
  audio_bytes, target_language, proficiency_level
623
  )
624