Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -3,7 +3,7 @@
|
|
| 3 |
Multi-Mode AI Assistant (Voice, PDF, Image) with colorful website-like UI
|
| 4 |
- All functionality preserved
|
| 5 |
- Dark theme, gradient buttons, visible text
|
| 6 |
-
-
|
| 7 |
"""
|
| 8 |
import os
|
| 9 |
import uuid
|
|
@@ -65,14 +65,31 @@ def select_relevant_chunk(question,chunks,chunk_embeds):
|
|
| 65 |
top_idx=int(scores.argmax().item())
|
| 66 |
return chunks[top_idx]
|
| 67 |
|
| 68 |
-
def
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
msgs.append({"role":"user","content":user})
|
| 72 |
-
msgs.append({"role":"assistant","content":assistant})
|
| 73 |
-
return msgs
|
| 74 |
|
| 75 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
def transcribe_audio(audio_path):
|
| 77 |
if not audio_path or not os.path.exists(audio_path):
|
| 78 |
return "Error: audio file missing."
|
|
@@ -185,42 +202,38 @@ def download_pdf_summary(session_id):
|
|
| 185 |
if not summary: summary="No summary available."
|
| 186 |
return generate_pdf_file(summary,"summary")
|
| 187 |
|
| 188 |
-
# ------------------ Voice ------------------
|
| 189 |
-
def _append_chat_display(session_id,user_text,assistant_text):
|
| 190 |
-
if session_id not in CHAT_DISPLAY: CHAT_DISPLAY[session_id]=[]
|
| 191 |
-
CHAT_DISPLAY[session_id].append((user_text,assistant_text))
|
| 192 |
-
|
| 193 |
def handle_voice_general(audio_file,session_id,tts_lang="en",enhancer_enabled=False,enhancer_tone="Helpful"):
|
| 194 |
path=_get_path_from_gr_file(audio_file)
|
| 195 |
-
if not path: return "No audio",None,[]
|
| 196 |
user_text=transcribe_audio(path)
|
| 197 |
assistant_text=generate_response(session_id,user_text,enhancer_enabled,enhancer_tone)
|
| 198 |
_append_chat_display(session_id,user_text,assistant_text)
|
| 199 |
audio_path=synthesize_speech(assistant_text,tts_lang)
|
| 200 |
-
return assistant_text,audio_path,
|
| 201 |
|
| 202 |
def handle_voice_pdf(audio_file,session_id,tts_lang="en"):
|
| 203 |
path=_get_path_from_gr_file(audio_file)
|
| 204 |
-
if not path: return "No audio",None,[]
|
| 205 |
user_text=transcribe_audio(path)
|
| 206 |
assistant_text=handle_pdf_question(user_text,session_id)
|
| 207 |
_append_chat_display(session_id,user_text,assistant_text)
|
| 208 |
audio_path=synthesize_speech(assistant_text,tts_lang)
|
| 209 |
-
return assistant_text,audio_path,
|
| 210 |
|
| 211 |
def handle_voice_image(audio_file,session_id,tts_lang="en"):
|
| 212 |
path=_get_path_from_gr_file(audio_file)
|
| 213 |
-
if not path: return "No audio",None,[]
|
| 214 |
user_text=transcribe_audio(path)
|
| 215 |
assistant_text=handle_image_question(user_text,session_id)
|
| 216 |
_append_chat_display(session_id,user_text,assistant_text)
|
| 217 |
audio_path=synthesize_speech(assistant_text,tts_lang)
|
| 218 |
-
return assistant_text,audio_path,
|
| 219 |
|
| 220 |
def handle_text_general(user_text,session_id,enhancer_enabled=False,enhancer_tone="Helpful"):
|
| 221 |
assistant=generate_response(session_id,user_text,enhancer_enabled,enhancer_tone)
|
| 222 |
_append_chat_display(session_id,user_text,assistant)
|
| 223 |
-
return assistant,
|
| 224 |
|
| 225 |
def handle_text_pdf(question,session_id): return handle_pdf_question(question,session_id)
|
| 226 |
def handle_text_image(question,session_id): return handle_image_question(question,session_id)
|
|
@@ -251,7 +264,7 @@ with gr.Blocks() as demo:
|
|
| 251 |
|
| 252 |
# --- Voice Tab ---
|
| 253 |
col_voice=gr.Column(visible=True)
|
| 254 |
-
chat_voice=gr.Chatbot(height=
|
| 255 |
with gr.Row():
|
| 256 |
mic=gr.Audio(type="filepath",label="🎤 Record Voice")
|
| 257 |
audio_output=gr.Audio(type="filepath",label="Assistant Voice",interactive=False)
|
|
|
|
| 3 |
Multi-Mode AI Assistant (Voice, PDF, Image) with colorful website-like UI
|
| 4 |
- All functionality preserved
|
| 5 |
- Dark theme, gradient buttons, visible text
|
| 6 |
+
- Chat bubbles for user/assistant messages
|
| 7 |
"""
|
| 8 |
import os
|
| 9 |
import uuid
|
|
|
|
| 65 |
top_idx=int(scores.argmax().item())
|
| 66 |
return chunks[top_idx]
|
| 67 |
|
| 68 |
+
def _append_chat_display(session_id,user_text,assistant_text):
|
| 69 |
+
if session_id not in CHAT_DISPLAY: CHAT_DISPLAY[session_id]=[]
|
| 70 |
+
CHAT_DISPLAY[session_id].append((user_text,assistant_text))
|
|
|
|
|
|
|
|
|
|
| 71 |
|
| 72 |
+
def format_chat_messages(chat_display):
|
| 73 |
+
"""
|
| 74 |
+
Returns a list of tuples (html_text, None) for gr.Chatbot with colored bubbles.
|
| 75 |
+
"""
|
| 76 |
+
formatted=[]
|
| 77 |
+
for user_msg, assistant_msg in chat_display:
|
| 78 |
+
user_html=f"""
|
| 79 |
+
<div style="background:#38bdf8;color:#0f172a;padding:8px 12px;border-radius:12px;max-width:70%;margin:5px 0;align-self:flex-end;">
|
| 80 |
+
{user_msg}
|
| 81 |
+
</div>
|
| 82 |
+
"""
|
| 83 |
+
assistant_html=f"""
|
| 84 |
+
<div style="background:#facc15;color:#1e293b;padding:8px 12px;border-radius:12px;max-width:70%;margin:5px 0;align-self:flex-start;">
|
| 85 |
+
{assistant_msg}
|
| 86 |
+
</div>
|
| 87 |
+
"""
|
| 88 |
+
formatted.append((user_html,None))
|
| 89 |
+
formatted.append((assistant_html,None))
|
| 90 |
+
return formatted
|
| 91 |
+
|
| 92 |
+
# ------------------ Transcription & LLM ------------------
|
| 93 |
def transcribe_audio(audio_path):
|
| 94 |
if not audio_path or not os.path.exists(audio_path):
|
| 95 |
return "Error: audio file missing."
|
|
|
|
| 202 |
if not summary: summary="No summary available."
|
| 203 |
return generate_pdf_file(summary,"summary")
|
| 204 |
|
| 205 |
+
# ------------------ Voice & Text Handlers ------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
| 206 |
def handle_voice_general(audio_file,session_id,tts_lang="en",enhancer_enabled=False,enhancer_tone="Helpful"):
|
| 207 |
path=_get_path_from_gr_file(audio_file)
|
| 208 |
+
if not path: return "No audio", None, []
|
| 209 |
user_text=transcribe_audio(path)
|
| 210 |
assistant_text=generate_response(session_id,user_text,enhancer_enabled,enhancer_tone)
|
| 211 |
_append_chat_display(session_id,user_text,assistant_text)
|
| 212 |
audio_path=synthesize_speech(assistant_text,tts_lang)
|
| 213 |
+
return assistant_text, audio_path, format_chat_messages(CHAT_DISPLAY[session_id])
|
| 214 |
|
| 215 |
def handle_voice_pdf(audio_file,session_id,tts_lang="en"):
|
| 216 |
path=_get_path_from_gr_file(audio_file)
|
| 217 |
+
if not path: return "No audio", None, []
|
| 218 |
user_text=transcribe_audio(path)
|
| 219 |
assistant_text=handle_pdf_question(user_text,session_id)
|
| 220 |
_append_chat_display(session_id,user_text,assistant_text)
|
| 221 |
audio_path=synthesize_speech(assistant_text,tts_lang)
|
| 222 |
+
return assistant_text, audio_path, format_chat_messages(CHAT_DISPLAY[session_id])
|
| 223 |
|
| 224 |
def handle_voice_image(audio_file,session_id,tts_lang="en"):
|
| 225 |
path=_get_path_from_gr_file(audio_file)
|
| 226 |
+
if not path: return "No audio", None, []
|
| 227 |
user_text=transcribe_audio(path)
|
| 228 |
assistant_text=handle_image_question(user_text,session_id)
|
| 229 |
_append_chat_display(session_id,user_text,assistant_text)
|
| 230 |
audio_path=synthesize_speech(assistant_text,tts_lang)
|
| 231 |
+
return assistant_text, audio_path, format_chat_messages(CHAT_DISPLAY[session_id])
|
| 232 |
|
| 233 |
def handle_text_general(user_text,session_id,enhancer_enabled=False,enhancer_tone="Helpful"):
|
| 234 |
assistant=generate_response(session_id,user_text,enhancer_enabled,enhancer_tone)
|
| 235 |
_append_chat_display(session_id,user_text,assistant)
|
| 236 |
+
return assistant, format_chat_messages(CHAT_DISPLAY[session_id])
|
| 237 |
|
| 238 |
def handle_text_pdf(question,session_id): return handle_pdf_question(question,session_id)
|
| 239 |
def handle_text_image(question,session_id): return handle_image_question(question,session_id)
|
|
|
|
| 264 |
|
| 265 |
# --- Voice Tab ---
|
| 266 |
col_voice=gr.Column(visible=True)
|
| 267 |
+
chat_voice=gr.Chatbot(height=400)
|
| 268 |
with gr.Row():
|
| 269 |
mic=gr.Audio(type="filepath",label="🎤 Record Voice")
|
| 270 |
audio_output=gr.Audio(type="filepath",label="Assistant Voice",interactive=False)
|