asad9641 commited on
Commit
c2188d0
·
verified ·
1 Parent(s): ff88dbb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -21
app.py CHANGED
@@ -3,7 +3,7 @@
3
  Multi-Mode AI Assistant (Voice, PDF, Image) with colorful website-like UI
4
  - All functionality preserved
5
  - Dark theme, gradient buttons, visible text
6
- - Gradio Blocks with columns as tabs
7
  """
8
  import os
9
  import uuid
@@ -65,14 +65,31 @@ def select_relevant_chunk(question,chunks,chunk_embeds):
65
  top_idx=int(scores.argmax().item())
66
  return chunks[top_idx]
67
 
68
- def _chat_display_to_messages(chat_display):
69
- msgs=[]
70
- for user,assistant in chat_display:
71
- msgs.append({"role":"user","content":user})
72
- msgs.append({"role":"assistant","content":assistant})
73
- return msgs
74
 
75
- # ------------------ LLM & Transcription ------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
  def transcribe_audio(audio_path):
77
  if not audio_path or not os.path.exists(audio_path):
78
  return "Error: audio file missing."
@@ -185,42 +202,38 @@ def download_pdf_summary(session_id):
185
  if not summary: summary="No summary available."
186
  return generate_pdf_file(summary,"summary")
187
 
188
- # ------------------ Voice ------------------
189
- def _append_chat_display(session_id,user_text,assistant_text):
190
- if session_id not in CHAT_DISPLAY: CHAT_DISPLAY[session_id]=[]
191
- CHAT_DISPLAY[session_id].append((user_text,assistant_text))
192
-
193
  def handle_voice_general(audio_file,session_id,tts_lang="en",enhancer_enabled=False,enhancer_tone="Helpful"):
194
  path=_get_path_from_gr_file(audio_file)
195
- if not path: return "No audio",None,[]
196
  user_text=transcribe_audio(path)
197
  assistant_text=generate_response(session_id,user_text,enhancer_enabled,enhancer_tone)
198
  _append_chat_display(session_id,user_text,assistant_text)
199
  audio_path=synthesize_speech(assistant_text,tts_lang)
200
- return assistant_text,audio_path,_chat_display_to_messages(CHAT_DISPLAY[session_id])
201
 
202
  def handle_voice_pdf(audio_file,session_id,tts_lang="en"):
203
  path=_get_path_from_gr_file(audio_file)
204
- if not path: return "No audio",None,[]
205
  user_text=transcribe_audio(path)
206
  assistant_text=handle_pdf_question(user_text,session_id)
207
  _append_chat_display(session_id,user_text,assistant_text)
208
  audio_path=synthesize_speech(assistant_text,tts_lang)
209
- return assistant_text,audio_path,_chat_display_to_messages(CHAT_DISPLAY[session_id])
210
 
211
  def handle_voice_image(audio_file,session_id,tts_lang="en"):
212
  path=_get_path_from_gr_file(audio_file)
213
- if not path: return "No audio",None,[]
214
  user_text=transcribe_audio(path)
215
  assistant_text=handle_image_question(user_text,session_id)
216
  _append_chat_display(session_id,user_text,assistant_text)
217
  audio_path=synthesize_speech(assistant_text,tts_lang)
218
- return assistant_text,audio_path,_chat_display_to_messages(CHAT_DISPLAY[session_id])
219
 
220
  def handle_text_general(user_text,session_id,enhancer_enabled=False,enhancer_tone="Helpful"):
221
  assistant=generate_response(session_id,user_text,enhancer_enabled,enhancer_tone)
222
  _append_chat_display(session_id,user_text,assistant)
223
- return assistant,_chat_display_to_messages(CHAT_DISPLAY[session_id])
224
 
225
  def handle_text_pdf(question,session_id): return handle_pdf_question(question,session_id)
226
  def handle_text_image(question,session_id): return handle_image_question(question,session_id)
@@ -251,7 +264,7 @@ with gr.Blocks() as demo:
251
 
252
  # --- Voice Tab ---
253
  col_voice=gr.Column(visible=True)
254
- chat_voice=gr.Chatbot(height=300)
255
  with gr.Row():
256
  mic=gr.Audio(type="filepath",label="🎤 Record Voice")
257
  audio_output=gr.Audio(type="filepath",label="Assistant Voice",interactive=False)
 
3
  Multi-Mode AI Assistant (Voice, PDF, Image) with colorful website-like UI
4
  - All functionality preserved
5
  - Dark theme, gradient buttons, visible text
6
+ - Chat bubbles for user/assistant messages
7
  """
8
  import os
9
  import uuid
 
65
  top_idx=int(scores.argmax().item())
66
  return chunks[top_idx]
67
 
68
+ def _append_chat_display(session_id,user_text,assistant_text):
69
+ if session_id not in CHAT_DISPLAY: CHAT_DISPLAY[session_id]=[]
70
+ CHAT_DISPLAY[session_id].append((user_text,assistant_text))
 
 
 
71
 
72
+ def format_chat_messages(chat_display):
73
+ """
74
+ Returns a list of tuples (html_text, None) for gr.Chatbot with colored bubbles.
75
+ """
76
+ formatted=[]
77
+ for user_msg, assistant_msg in chat_display:
78
+ user_html=f"""
79
+ <div style="background:#38bdf8;color:#0f172a;padding:8px 12px;border-radius:12px;max-width:70%;margin:5px 0;align-self:flex-end;">
80
+ {user_msg}
81
+ </div>
82
+ """
83
+ assistant_html=f"""
84
+ <div style="background:#facc15;color:#1e293b;padding:8px 12px;border-radius:12px;max-width:70%;margin:5px 0;align-self:flex-start;">
85
+ {assistant_msg}
86
+ </div>
87
+ """
88
+ formatted.append((user_html,None))
89
+ formatted.append((assistant_html,None))
90
+ return formatted
91
+
92
+ # ------------------ Transcription & LLM ------------------
93
  def transcribe_audio(audio_path):
94
  if not audio_path or not os.path.exists(audio_path):
95
  return "Error: audio file missing."
 
202
  if not summary: summary="No summary available."
203
  return generate_pdf_file(summary,"summary")
204
 
205
+ # ------------------ Voice & Text Handlers ------------------
 
 
 
 
206
  def handle_voice_general(audio_file,session_id,tts_lang="en",enhancer_enabled=False,enhancer_tone="Helpful"):
207
  path=_get_path_from_gr_file(audio_file)
208
+ if not path: return "No audio", None, []
209
  user_text=transcribe_audio(path)
210
  assistant_text=generate_response(session_id,user_text,enhancer_enabled,enhancer_tone)
211
  _append_chat_display(session_id,user_text,assistant_text)
212
  audio_path=synthesize_speech(assistant_text,tts_lang)
213
+ return assistant_text, audio_path, format_chat_messages(CHAT_DISPLAY[session_id])
214
 
215
  def handle_voice_pdf(audio_file,session_id,tts_lang="en"):
216
  path=_get_path_from_gr_file(audio_file)
217
+ if not path: return "No audio", None, []
218
  user_text=transcribe_audio(path)
219
  assistant_text=handle_pdf_question(user_text,session_id)
220
  _append_chat_display(session_id,user_text,assistant_text)
221
  audio_path=synthesize_speech(assistant_text,tts_lang)
222
+ return assistant_text, audio_path, format_chat_messages(CHAT_DISPLAY[session_id])
223
 
224
  def handle_voice_image(audio_file,session_id,tts_lang="en"):
225
  path=_get_path_from_gr_file(audio_file)
226
+ if not path: return "No audio", None, []
227
  user_text=transcribe_audio(path)
228
  assistant_text=handle_image_question(user_text,session_id)
229
  _append_chat_display(session_id,user_text,assistant_text)
230
  audio_path=synthesize_speech(assistant_text,tts_lang)
231
+ return assistant_text, audio_path, format_chat_messages(CHAT_DISPLAY[session_id])
232
 
233
  def handle_text_general(user_text,session_id,enhancer_enabled=False,enhancer_tone="Helpful"):
234
  assistant=generate_response(session_id,user_text,enhancer_enabled,enhancer_tone)
235
  _append_chat_display(session_id,user_text,assistant)
236
+ return assistant, format_chat_messages(CHAT_DISPLAY[session_id])
237
 
238
  def handle_text_pdf(question,session_id): return handle_pdf_question(question,session_id)
239
  def handle_text_image(question,session_id): return handle_image_question(question,session_id)
 
264
 
265
  # --- Voice Tab ---
266
  col_voice=gr.Column(visible=True)
267
+ chat_voice=gr.Chatbot(height=400)
268
  with gr.Row():
269
  mic=gr.Audio(type="filepath",label="🎤 Record Voice")
270
  audio_output=gr.Audio(type="filepath",label="Assistant Voice",interactive=False)