asad9641 commited on
Commit
5c70b41
·
verified ·
1 Parent(s): c88db1d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -70
app.py CHANGED
@@ -86,9 +86,8 @@ def select_relevant_chunk(question, chunks, chunk_embeds):
86
  def _chat_display_to_messages(chat_display):
87
  msgs = []
88
  for user, assistant in chat_display:
89
- if user and assistant: # only include valid strings
90
- msgs.append({"role": "user", "content": str(user)})
91
- msgs.append({"role": "assistant", "content": str(assistant)})
92
  return msgs
93
 
94
  # ------------------ Transcription & LLM ------------------
@@ -108,25 +107,9 @@ def transcribe_audio(audio_path):
108
  return f"Error transcribing audio: {e}"
109
 
110
  def groq_chat_completion(messages):
111
- # Ensure all messages are dicts with 'role' and 'content' as non-empty strings
112
- safe_msgs = []
113
- for m in messages:
114
- if isinstance(m, dict):
115
- role = m.get("role")
116
- content = m.get("content")
117
- if role in ("user", "assistant", "system") and isinstance(content, str) and content.strip():
118
- safe_msgs.append({"role": role, "content": content.strip()})
119
- if not safe_msgs:
120
- return "Error: No valid messages to send to API."
121
-
122
- body = {"model": "llama-3.1-8b-instant", "messages": safe_msgs}
123
  try:
124
- resp = requests.post(
125
- "https://api/groq.com/openai/v1/chat/completions",
126
- headers=HEADERS,
127
- json=body,
128
- timeout=60
129
- )
130
  resp.raise_for_status()
131
  return resp.json()["choices"][0]["message"]["content"]
132
  except Exception as e:
@@ -134,9 +117,6 @@ def groq_chat_completion(messages):
134
  return f"Error generating response: {e}"
135
 
136
  def generate_response(session_id, user_text, enhancer_enabled=False, enhancer_tone="Helpful"):
137
- if not isinstance(user_text, str) or not user_text.strip():
138
- user_text = "No input provided."
139
-
140
  if session_id not in SESSION_HISTORY:
141
  SESSION_HISTORY[session_id] = []
142
 
@@ -147,7 +127,7 @@ def generate_response(session_id, user_text, enhancer_enabled=False, enhancer_to
147
  messages.append({"role": "user", "content": f"Enhance response. Tone: {enhancer_tone}. Question: {user_text}"})
148
 
149
  assistant_text = groq_chat_completion(messages)
150
- SESSION_HISTORY[session_id].append({"role": "assistant", "content": assistant_text or "No response generated."})
151
  return assistant_text
152
 
153
  # ------------------ PDF handling ------------------
@@ -179,6 +159,7 @@ def handle_pdf_question(question, session_id):
179
  {"role": "user", "content": f"PDF chunk:\n{chunk}\n\nQuestion: {question}"}
180
  ]
181
  assistant_text = groq_chat_completion(messages)
 
182
  assistant_text = f"**Snippet from PDF:**\n{chunk[:200]}...\n\n**Answer:**\n{assistant_text}"
183
  if session_id not in SESSION_HISTORY:
184
  SESSION_HISTORY[session_id] = []
@@ -246,71 +227,52 @@ def generate_pdf_file(text, filename_prefix="summary"):
246
  pdf.output(file_path)
247
  return file_path
248
 
249
- # ------------------ Full summary download (SAFE) ------------------
250
- def download_full_summary(session_voice, session_pdf, session_image):
251
- combined = []
252
-
253
- for session, label in zip([session_voice, session_pdf, session_image], ["🎤 VOICE", "📄 PDF", "🖼 IMAGE"]):
254
- history = SESSION_HISTORY.get(session, [])
255
- for m in history:
256
- if isinstance(m, dict) and m.get("role") == "assistant" and isinstance(m.get("content"), str) and m["content"].strip():
257
- combined.append(f"{label}:\n{m['content']}")
258
-
259
- if not combined:
260
- combined.append("No summary available.")
261
-
262
- summary_text = "\n\n".join(combined)
263
- return generate_pdf_file(summary_text, "full_summary")
264
 
265
  # ------------------ Voice & Chat Handlers ------------------
266
  def _append_chat_display(session_id, user_text, assistant_text):
267
  if session_id not in CHAT_DISPLAY:
268
  CHAT_DISPLAY[session_id] = []
269
- CHAT_DISPLAY[session_id].append((str(user_text or ""), str(assistant_text or "")))
270
 
271
  def handle_voice_general(audio_file, session_id, tts_lang="en", enhancer_enabled=False, enhancer_tone="Helpful"):
272
  path = _get_path_from_gr_file(audio_file)
273
  if not path:
274
  return "No audio provided.", None, []
275
-
276
  user_text = transcribe_audio(path)
277
  assistant_text = generate_response(session_id, user_text, enhancer_enabled, enhancer_tone)
278
-
279
  _append_chat_display(session_id, user_text, assistant_text)
280
  audio_path = synthesize_speech(assistant_text, lang=tts_lang)
281
-
282
- return assistant_text, audio_path, CHAT_DISPLAY[session_id]
283
 
284
  def handle_voice_pdf(audio_file, session_id, tts_lang="en"):
285
  path = _get_path_from_gr_file(audio_file)
286
  if not path:
287
  return "No audio provided.", None, []
288
-
289
  user_text = transcribe_audio(path)
290
  assistant_text = handle_pdf_question(user_text, session_id)
291
-
292
  _append_chat_display(session_id, user_text, assistant_text)
293
  audio_path = synthesize_speech(assistant_text, lang=tts_lang)
294
-
295
- return assistant_text, audio_path, CHAT_DISPLAY[session_id]
296
 
297
  def handle_voice_image(audio_file, session_id, tts_lang="en"):
298
  path = _get_path_from_gr_file(audio_file)
299
  if not path:
300
  return "No audio provided.", None, []
301
-
302
  user_text = transcribe_audio(path)
303
  assistant_text = handle_image_question(user_text, session_id)
304
-
305
  _append_chat_display(session_id, user_text, assistant_text)
306
  audio_path = synthesize_speech(assistant_text, lang=tts_lang)
307
-
308
- return assistant_text, audio_path, CHAT_DISPLAY[session_id]
309
 
310
  def handle_text_general(user_text, session_id, enhancer_enabled=False, enhancer_tone="Helpful"):
311
  assistant = generate_response(session_id, user_text, enhancer_enabled, enhancer_tone)
312
  _append_chat_display(session_id, user_text, assistant)
313
- return assistant, CHAT_DISPLAY[session_id]
314
 
315
  def handle_text_pdf(question, session_id):
316
  return handle_pdf_question(question, session_id)
@@ -322,9 +284,10 @@ def handle_text_image(question, session_id):
322
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
323
  gr.HTML("""
324
  <style>
 
325
  #mic_box audio {
326
- height: 50px !important;
327
- width: 200px !important;
328
  }
329
  </style>
330
  """)
@@ -333,11 +296,13 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
333
  session_voice = gr.State(str(uuid.uuid4()))
334
  session_pdf = gr.State(str(uuid.uuid4()))
335
  session_image = gr.State(str(uuid.uuid4()))
 
 
336
 
337
  with gr.Tab("🎤 Voice Chat"):
338
  chat_voice = gr.Chatbot(type="messages", height=300)
339
  with gr.Row():
340
- mic = gr.Audio(type="filepath", label="🎤 Record Voice (hold & speak)", show_download_button=True, elem_id="mic_box")
341
  audio_output = gr.Audio(label="Assistant Voice Output", type="filepath", interactive=False)
342
  tts_lang = gr.Dropdown(choices=["en", "ur"], value="en", label="TTS Language")
343
 
@@ -345,12 +310,19 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
345
  btn_general = gr.Button("⚡Ask General 🎯")
346
  btn_pdf = gr.Button("⚡Ask PDF 📄")
347
  btn_image = gr.Button("⚡Ask Image 🖼")
348
- enhancer_toggle = gr.Checkbox(label="Enable Response Enhancer", value=False, scale=1)
349
- tone_dropdown = gr.Dropdown(choices=["Helpful", "Formal", "Friendly"], value="Helpful", label="Enhancer Tone", scale=1)
350
  with gr.Row():
351
  btn_reset_logs = gr.Button("♻ Reset LOGs")
352
  btn_download_logs = gr.Button("📥 Download Summary")
353
- Voice_summary_file = gr.File(label="📥Download Summary File", interactive=False, scale=1)
 
 
 
 
 
 
 
354
  answer_voice = gr.Textbox(label="Assistant Answer (text)", lines=2, visible=False)
355
 
356
  btn_general.click(fn=handle_voice_general,
@@ -358,14 +330,10 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
358
  outputs=[answer_voice, audio_output, chat_voice])
359
  btn_pdf.click(fn=handle_voice_pdf, inputs=[mic, session_pdf, tts_lang], outputs=[answer_voice, audio_output, chat_voice])
360
  btn_image.click(fn=handle_voice_image, inputs=[mic, session_image, tts_lang], outputs=[answer_voice, audio_output, chat_voice])
 
361
  btn_reset_logs.click(lambda: (str(uuid.uuid4()), [], None, None, ""), outputs=[session_voice, chat_voice, mic, audio_output, answer_voice])
362
-
363
- # FIXED: Full summary download safely
364
- btn_download_logs.click(
365
- download_full_summary,
366
- inputs=[session_voice, session_pdf, session_image],
367
- outputs=[Voice_summary_file]
368
- )
369
  with gr.Tab("📄 PDF Summarizer"):
370
  pdf_output = gr.Textbox(label="Answer (Text Only)", lines=5)
371
  with gr.Row():
@@ -383,7 +351,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
383
  pdf_upload_btn.upload(handle_pdf_upload, inputs=[pdf_upload_btn, session_pdf], outputs=[pdf_upload_msg])
384
  pdf_send_btn.click(handle_text_pdf, inputs=[pdf_question, session_pdf], outputs=[pdf_output])
385
  pdf_reset_btn.click(lambda: (str(uuid.uuid4()), ""), outputs=[session_pdf, pdf_output])
386
- pdf_download_btn.click( download_full_summary, inputs=[session_voice, session_pdf, session_image], outputs=[pdf_summary_file])
387
 
388
  with gr.Tab("🖼 Image OCR"):
389
  image_output = gr.Textbox(label="Answer (Text Only)", lines=5)
@@ -402,7 +370,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
402
  image_upload_btn.upload(handle_image_upload, inputs=[image_upload_btn, session_image], outputs=[image_upload_msg, image_output])
403
  image_send_btn.click(handle_text_image, inputs=[image_question, session_image], outputs=[image_output])
404
  image_reset_btn.click(lambda: (str(uuid.uuid4()), ""), outputs=[session_image, image_output])
405
- image_download_btn.click( download_full_summary, inputs=[session_voice, session_pdf, session_image], outputs=[image_summary_file])
406
 
407
  if __name__ == "__main__":
408
- demo.launch()
 
86
  def _chat_display_to_messages(chat_display):
87
  msgs = []
88
  for user, assistant in chat_display:
89
+ msgs.append({"role": "user", "content": user})
90
+ msgs.append({"role": "assistant", "content": assistant})
 
91
  return msgs
92
 
93
  # ------------------ Transcription & LLM ------------------
 
107
  return f"Error transcribing audio: {e}"
108
 
109
  def groq_chat_completion(messages):
110
+ body = {"model": "llama-3.1-8b-instant", "messages": messages}
 
 
 
 
 
 
 
 
 
 
 
111
  try:
112
+ resp = requests.post("https://api.groq.com/openai/v1/chat/completions", headers=HEADERS, json=body, timeout=60)
 
 
 
 
 
113
  resp.raise_for_status()
114
  return resp.json()["choices"][0]["message"]["content"]
115
  except Exception as e:
 
117
  return f"Error generating response: {e}"
118
 
119
  def generate_response(session_id, user_text, enhancer_enabled=False, enhancer_tone="Helpful"):
 
 
 
120
  if session_id not in SESSION_HISTORY:
121
  SESSION_HISTORY[session_id] = []
122
 
 
127
  messages.append({"role": "user", "content": f"Enhance response. Tone: {enhancer_tone}. Question: {user_text}"})
128
 
129
  assistant_text = groq_chat_completion(messages)
130
+ SESSION_HISTORY[session_id].append({"role": "assistant", "content": assistant_text})
131
  return assistant_text
132
 
133
  # ------------------ PDF handling ------------------
 
159
  {"role": "user", "content": f"PDF chunk:\n{chunk}\n\nQuestion: {question}"}
160
  ]
161
  assistant_text = groq_chat_completion(messages)
162
+ # Add snippet highlighting for wow factor
163
  assistant_text = f"**Snippet from PDF:**\n{chunk[:200]}...\n\n**Answer:**\n{assistant_text}"
164
  if session_id not in SESSION_HISTORY:
165
  SESSION_HISTORY[session_id] = []
 
227
  pdf.output(file_path)
228
  return file_path
229
 
230
+ def download_pdf_summary(session_id):
231
+ summary_text = "\n".join([m["content"] for m in SESSION_HISTORY.get(session_id, []) if m["role"]=="assistant"])
232
+ if not summary_text:
233
+ summary_text = "No summary available."
234
+ return generate_pdf_file(summary_text, "summary")
 
 
 
 
 
 
 
 
 
 
235
 
236
  # ------------------ Voice & Chat Handlers ------------------
237
  def _append_chat_display(session_id, user_text, assistant_text):
238
  if session_id not in CHAT_DISPLAY:
239
  CHAT_DISPLAY[session_id] = []
240
+ CHAT_DISPLAY[session_id].append((user_text, assistant_text))
241
 
242
  def handle_voice_general(audio_file, session_id, tts_lang="en", enhancer_enabled=False, enhancer_tone="Helpful"):
243
  path = _get_path_from_gr_file(audio_file)
244
  if not path:
245
  return "No audio provided.", None, []
 
246
  user_text = transcribe_audio(path)
247
  assistant_text = generate_response(session_id, user_text, enhancer_enabled, enhancer_tone)
 
248
  _append_chat_display(session_id, user_text, assistant_text)
249
  audio_path = synthesize_speech(assistant_text, lang=tts_lang)
250
+ return assistant_text, audio_path, _chat_display_to_messages(CHAT_DISPLAY[session_id])
 
251
 
252
  def handle_voice_pdf(audio_file, session_id, tts_lang="en"):
253
  path = _get_path_from_gr_file(audio_file)
254
  if not path:
255
  return "No audio provided.", None, []
 
256
  user_text = transcribe_audio(path)
257
  assistant_text = handle_pdf_question(user_text, session_id)
 
258
  _append_chat_display(session_id, user_text, assistant_text)
259
  audio_path = synthesize_speech(assistant_text, lang=tts_lang)
260
+ return assistant_text, audio_path, _chat_display_to_messages(CHAT_DISPLAY[session_id])
 
261
 
262
  def handle_voice_image(audio_file, session_id, tts_lang="en"):
263
  path = _get_path_from_gr_file(audio_file)
264
  if not path:
265
  return "No audio provided.", None, []
 
266
  user_text = transcribe_audio(path)
267
  assistant_text = handle_image_question(user_text, session_id)
 
268
  _append_chat_display(session_id, user_text, assistant_text)
269
  audio_path = synthesize_speech(assistant_text, lang=tts_lang)
270
+ return assistant_text, audio_path, _chat_display_to_messages(CHAT_DISPLAY[session_id])
 
271
 
272
  def handle_text_general(user_text, session_id, enhancer_enabled=False, enhancer_tone="Helpful"):
273
  assistant = generate_response(session_id, user_text, enhancer_enabled, enhancer_tone)
274
  _append_chat_display(session_id, user_text, assistant)
275
+ return assistant, _chat_display_to_messages(CHAT_DISPLAY[session_id])
276
 
277
  def handle_text_pdf(question, session_id):
278
  return handle_pdf_question(question, session_id)
 
284
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
285
  gr.HTML("""
286
  <style>
287
+ /* Change height + width of the audio recorder box */
288
  #mic_box audio {
289
+ height: 50px !important; /* adjust height */
290
+ width: 200px !important; /* adjust width (optional) */
291
  }
292
  </style>
293
  """)
 
296
  session_voice = gr.State(str(uuid.uuid4()))
297
  session_pdf = gr.State(str(uuid.uuid4()))
298
  session_image = gr.State(str(uuid.uuid4()))
299
+ # FIX: define pdf_summary_file BEFORE it is used
300
+ #pdf_summary_file = gr.File(label="Download Summary", visible=False)
301
 
302
  with gr.Tab("🎤 Voice Chat"):
303
  chat_voice = gr.Chatbot(type="messages", height=300)
304
  with gr.Row():
305
+ mic = gr.Audio(type="filepath",label="🎤 Record Voice (hold & speak)", show_download_button=True, elem_id="mic_box")
306
  audio_output = gr.Audio(label="Assistant Voice Output", type="filepath", interactive=False)
307
  tts_lang = gr.Dropdown(choices=["en", "ur"], value="en", label="TTS Language")
308
 
 
310
  btn_general = gr.Button("⚡Ask General 🎯")
311
  btn_pdf = gr.Button("⚡Ask PDF 📄")
312
  btn_image = gr.Button("⚡Ask Image 🖼")
313
+ enhancer_toggle = gr.Checkbox(label="Enable Response Enhancer", value=False, scale =1)
314
+ tone_dropdown = gr.Dropdown(choices=["Helpful", "Formal", "Friendly"], value="Helpful", label="Enhancer Tone", scale =1)
315
  with gr.Row():
316
  btn_reset_logs = gr.Button("♻ Reset LOGs")
317
  btn_download_logs = gr.Button("📥 Download Summary")
318
+ Voice_summary_file = gr.File(label="📥Download Summary File", interactive=False,scale =1)
319
+ #btn_general = gr.Button("⚡Ask General 🎯")
320
+ #btn_pdf = gr.Button("⚡Ask PDF 📄")
321
+ #btn_image = gr.Button("⚡Ask Image 🖼")
322
+ #with gr.Row():
323
+ #text_input = gr.Textbox(label="Or type a question (General)",visible=False)
324
+ #btn_send_text = gr.Button("Send (Text General)",visible=False)
325
+ #btn_reset_logs = gr.Button("♻ Reset LOGs")
326
  answer_voice = gr.Textbox(label="Assistant Answer (text)", lines=2, visible=False)
327
 
328
  btn_general.click(fn=handle_voice_general,
 
330
  outputs=[answer_voice, audio_output, chat_voice])
331
  btn_pdf.click(fn=handle_voice_pdf, inputs=[mic, session_pdf, tts_lang], outputs=[answer_voice, audio_output, chat_voice])
332
  btn_image.click(fn=handle_voice_image, inputs=[mic, session_image, tts_lang], outputs=[answer_voice, audio_output, chat_voice])
333
+ # btn_send_text.click(fn=handle_text_general, inputs=[text_input, session_voice, enhancer_toggle, tone_dropdown], outputs=[answer_voice, chat_voice])
334
  btn_reset_logs.click(lambda: (str(uuid.uuid4()), [], None, None, ""), outputs=[session_voice, chat_voice, mic, audio_output, answer_voice])
335
+ btn_download_logs.click(download_pdf_summary, inputs=[session_voice], outputs=[Voice_summary_file])
336
+
 
 
 
 
 
337
  with gr.Tab("📄 PDF Summarizer"):
338
  pdf_output = gr.Textbox(label="Answer (Text Only)", lines=5)
339
  with gr.Row():
 
351
  pdf_upload_btn.upload(handle_pdf_upload, inputs=[pdf_upload_btn, session_pdf], outputs=[pdf_upload_msg])
352
  pdf_send_btn.click(handle_text_pdf, inputs=[pdf_question, session_pdf], outputs=[pdf_output])
353
  pdf_reset_btn.click(lambda: (str(uuid.uuid4()), ""), outputs=[session_pdf, pdf_output])
354
+ pdf_download_btn.click(download_pdf_summary, inputs=[session_pdf], outputs=[pdf_summary_file])
355
 
356
  with gr.Tab("🖼 Image OCR"):
357
  image_output = gr.Textbox(label="Answer (Text Only)", lines=5)
 
370
  image_upload_btn.upload(handle_image_upload, inputs=[image_upload_btn, session_image], outputs=[image_upload_msg, image_output])
371
  image_send_btn.click(handle_text_image, inputs=[image_question, session_image], outputs=[image_output])
372
  image_reset_btn.click(lambda: (str(uuid.uuid4()), ""), outputs=[session_image, image_output])
373
+ image_download_btn.click(download_pdf_summary, inputs=[session_image], outputs=[image_summary_file])
374
 
375
  if __name__ == "__main__":
376
+ demo.launch()