asad9641 commited on
Commit
9a0eaa2
·
verified ·
1 Parent(s): a54913f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -21
app.py CHANGED
@@ -109,7 +109,7 @@ def transcribe_audio(audio_path):
109
  def groq_chat_completion(messages):
110
  body = {"model": "llama-3.1-8b-instant", "messages": messages}
111
  try:
112
- resp = requests.post("https://api/groq.com/openai/v1/chat/completions", headers=HEADERS, json=body, timeout=60)
113
  resp.raise_for_status()
114
  return resp.json()["choices"][0]["message"]["content"]
115
  except Exception as e:
@@ -159,6 +159,7 @@ def handle_pdf_question(question, session_id):
159
  {"role": "user", "content": f"PDF chunk:\n{chunk}\n\nQuestion: {question}"}
160
  ]
161
  assistant_text = groq_chat_completion(messages)
 
162
  assistant_text = f"**Snippet from PDF:**\n{chunk[:200]}...\n\n**Answer:**\n{assistant_text}"
163
  if session_id not in SESSION_HISTORY:
164
  SESSION_HISTORY[session_id] = []
@@ -283,13 +284,11 @@ def handle_text_image(question, session_id):
283
  with gr.Blocks() as demo:
284
  gr.HTML("""
285
  <style>
286
- /* Audio recorder styling */
287
- #mic_box audio { height: 50px !important; width: 200px !important; }
288
- /* Chat bubbles */
289
- .chatbot .user { background-color: #D1E8FF; color: #000; border-radius: 12px; padding:5px 10px; }
290
- .chatbot .assistant { background-color: #FFE4B5; color: #000; border-radius: 12px; padding:5px 10px; }
291
- /* Tabs styling */
292
- .tabbutton { background: linear-gradient(90deg, #f6d365, #fda085); color: #fff !important; font-weight: bold; }
293
  </style>
294
  """)
295
  gr.Markdown("## 🛠 Multi-Mode AI Assistant (Voice, PDF, Image)")
@@ -297,23 +296,33 @@ with gr.Blocks() as demo:
297
  session_voice = gr.State(str(uuid.uuid4()))
298
  session_pdf = gr.State(str(uuid.uuid4()))
299
  session_image = gr.State(str(uuid.uuid4()))
 
 
300
 
301
  with gr.Tab("🎤 Voice Chat"):
302
- chat_voice = gr.Chatbot(height=350)
303
  with gr.Row():
304
- mic = gr.Audio(type="filepath", label="🎤 Record Voice (hold & speak)", elem_id="mic_box")
305
  audio_output = gr.Audio(label="Assistant Voice Output", type="filepath", interactive=False)
306
  tts_lang = gr.Dropdown(choices=["en", "ur"], value="en", label="TTS Language")
 
307
  with gr.Row():
308
  btn_general = gr.Button("⚡Ask General 🎯")
309
  btn_pdf = gr.Button("⚡Ask PDF 📄")
310
  btn_image = gr.Button("⚡Ask Image 🖼")
311
- enhancer_toggle = gr.Checkbox(label="Enable Response Enhancer", value=False)
312
- tone_dropdown = gr.Dropdown(choices=["Helpful","Formal","Friendly"], value="Helpful", label="Enhancer Tone")
313
  with gr.Row():
314
  btn_reset_logs = gr.Button("♻ Reset LOGs")
315
  btn_download_logs = gr.Button("📥 Download Summary")
316
- Voice_summary_file = gr.File(label="📥Download Summary File", interactive=False)
 
 
 
 
 
 
 
317
  answer_voice = gr.Textbox(label="Assistant Answer (text)", lines=2, visible=False)
318
 
319
  btn_general.click(fn=handle_voice_general,
@@ -321,44 +330,47 @@ with gr.Blocks() as demo:
321
  outputs=[answer_voice, audio_output, chat_voice])
322
  btn_pdf.click(fn=handle_voice_pdf, inputs=[mic, session_pdf, tts_lang], outputs=[answer_voice, audio_output, chat_voice])
323
  btn_image.click(fn=handle_voice_image, inputs=[mic, session_image, tts_lang], outputs=[answer_voice, audio_output, chat_voice])
 
324
  btn_reset_logs.click(lambda: (str(uuid.uuid4()), [], None, None, ""), outputs=[session_voice, chat_voice, mic, audio_output, answer_voice])
325
  btn_download_logs.click(download_pdf_summary, inputs=[session_voice], outputs=[Voice_summary_file])
326
-
327
- # PDF Tab
328
  with gr.Tab("📄 PDF Summarizer"):
329
  pdf_output = gr.Textbox(label="Answer (Text Only)", lines=5)
330
  with gr.Row():
331
- pdf_upload_btn = gr.File(label="Upload PDF", file_types=[".pdf"])
332
  pdf_question = gr.Textbox(label="Ask a question about PDF (text)", lines=3)
333
  pdf_upload_msg = gr.Textbox(label="Upload Status", interactive=False)
 
334
  with gr.Row():
335
  pdf_send_btn = gr.Button("Ask (Questions)")
336
  pdf_reset_btn = gr.Button("♻ Reset LOGs")
337
  with gr.Row():
338
- pdf_summary_file = gr.File(label="📥Download Summary File", interactive=False)
339
  pdf_download_btn = gr.Button("📥 Download Summary")
 
340
  pdf_upload_btn.upload(handle_pdf_upload, inputs=[pdf_upload_btn, session_pdf], outputs=[pdf_upload_msg])
341
  pdf_send_btn.click(handle_text_pdf, inputs=[pdf_question, session_pdf], outputs=[pdf_output])
342
  pdf_reset_btn.click(lambda: (str(uuid.uuid4()), ""), outputs=[session_pdf, pdf_output])
343
  pdf_download_btn.click(download_pdf_summary, inputs=[session_pdf], outputs=[pdf_summary_file])
344
 
345
- # Image Tab
346
  with gr.Tab("🖼 Image OCR"):
347
  image_output = gr.Textbox(label="Answer (Text Only)", lines=5)
348
  with gr.Row():
349
- image_upload_btn = gr.File(label="Upload Image", file_types=[".png", ".jpg", ".jpeg"])
350
  image_question = gr.Textbox(label="Ask question about Image", lines=3)
351
  image_upload_msg = gr.Textbox(label="Upload Status", interactive=False)
 
352
  with gr.Row():
353
  image_send_btn = gr.Button("Ask (Questions)")
354
  image_reset_btn = gr.Button("♻ Reset LOGs")
355
  with gr.Row():
356
- image_summary_file = gr.File(label="📥Download Summary File", interactive=False)
357
  image_download_btn = gr.Button("📥 Download Summary")
 
358
  image_upload_btn.upload(handle_image_upload, inputs=[image_upload_btn, session_image], outputs=[image_upload_msg, image_output])
359
  image_send_btn.click(handle_text_image, inputs=[image_question, session_image], outputs=[image_output])
360
  image_reset_btn.click(lambda: (str(uuid.uuid4()), ""), outputs=[session_image, image_output])
361
  image_download_btn.click(download_pdf_summary, inputs=[session_image], outputs=[image_summary_file])
362
 
363
  if __name__ == "__main__":
364
- demo.launch()
 
109
  def groq_chat_completion(messages):
110
  body = {"model": "llama-3.1-8b-instant", "messages": messages}
111
  try:
112
+ resp = requests.post("https://api.groq.com/openai/v1/chat/completions", headers=HEADERS, json=body, timeout=60)
113
  resp.raise_for_status()
114
  return resp.json()["choices"][0]["message"]["content"]
115
  except Exception as e:
 
159
  {"role": "user", "content": f"PDF chunk:\n{chunk}\n\nQuestion: {question}"}
160
  ]
161
  assistant_text = groq_chat_completion(messages)
162
+ # Add snippet highlighting for wow factor
163
  assistant_text = f"**Snippet from PDF:**\n{chunk[:200]}...\n\n**Answer:**\n{assistant_text}"
164
  if session_id not in SESSION_HISTORY:
165
  SESSION_HISTORY[session_id] = []
 
284
  with gr.Blocks() as demo:
285
  gr.HTML("""
286
  <style>
287
+ /* Change height + width of the audio recorder box */
288
+ #mic_box audio {
289
+ height: 50px !important; /* adjust height */
290
+ width: 200px !important; /* adjust width (optional) */
291
+ }
 
 
292
  </style>
293
  """)
294
  gr.Markdown("## 🛠 Multi-Mode AI Assistant (Voice, PDF, Image)")
 
296
  session_voice = gr.State(str(uuid.uuid4()))
297
  session_pdf = gr.State(str(uuid.uuid4()))
298
  session_image = gr.State(str(uuid.uuid4()))
299
+ # FIX: define pdf_summary_file BEFORE it is used
300
+ #pdf_summary_file = gr.File(label="Download Summary", visible=False)
301
 
302
  with gr.Tab("🎤 Voice Chat"):
303
+ chat_voice = gr.Chatbot( height=320)
304
  with gr.Row():
305
+ mic = gr.Audio(type="filepath",label="🎤 Record Voice (hold & speak)", elem_id="mic_box")
306
  audio_output = gr.Audio(label="Assistant Voice Output", type="filepath", interactive=False)
307
  tts_lang = gr.Dropdown(choices=["en", "ur"], value="en", label="TTS Language")
308
+
309
  with gr.Row():
310
  btn_general = gr.Button("⚡Ask General 🎯")
311
  btn_pdf = gr.Button("⚡Ask PDF 📄")
312
  btn_image = gr.Button("⚡Ask Image 🖼")
313
+ enhancer_toggle = gr.Checkbox(label="Enable Response Enhancer", value=False, scale =1)
314
+ tone_dropdown = gr.Dropdown(choices=["Helpful", "Formal", "Friendly"], value="Helpful", label="Enhancer Tone", scale =1)
315
  with gr.Row():
316
  btn_reset_logs = gr.Button("♻ Reset LOGs")
317
  btn_download_logs = gr.Button("📥 Download Summary")
318
+ Voice_summary_file = gr.File(label="📥Download Summary File", interactive=False,scale =1)
319
+ #btn_general = gr.Button("⚡Ask General 🎯")
320
+ #btn_pdf = gr.Button("⚡Ask PDF 📄")
321
+ #btn_image = gr.Button("⚡Ask Image 🖼")
322
+ #with gr.Row():
323
+ #text_input = gr.Textbox(label="Or type a question (General)",visible=False)
324
+ #btn_send_text = gr.Button("Send (Text General)",visible=False)
325
+ #btn_reset_logs = gr.Button("♻ Reset LOGs")
326
  answer_voice = gr.Textbox(label="Assistant Answer (text)", lines=2, visible=False)
327
 
328
  btn_general.click(fn=handle_voice_general,
 
330
  outputs=[answer_voice, audio_output, chat_voice])
331
  btn_pdf.click(fn=handle_voice_pdf, inputs=[mic, session_pdf, tts_lang], outputs=[answer_voice, audio_output, chat_voice])
332
  btn_image.click(fn=handle_voice_image, inputs=[mic, session_image, tts_lang], outputs=[answer_voice, audio_output, chat_voice])
333
+ # btn_send_text.click(fn=handle_text_general, inputs=[text_input, session_voice, enhancer_toggle, tone_dropdown], outputs=[answer_voice, chat_voice])
334
  btn_reset_logs.click(lambda: (str(uuid.uuid4()), [], None, None, ""), outputs=[session_voice, chat_voice, mic, audio_output, answer_voice])
335
  btn_download_logs.click(download_pdf_summary, inputs=[session_voice], outputs=[Voice_summary_file])
336
+
 
337
  with gr.Tab("📄 PDF Summarizer"):
338
  pdf_output = gr.Textbox(label="Answer (Text Only)", lines=5)
339
  with gr.Row():
340
+ pdf_upload_btn = gr.File(label="Upload PDF", file_types=[".pdf"], scale=1 )
341
  pdf_question = gr.Textbox(label="Ask a question about PDF (text)", lines=3)
342
  pdf_upload_msg = gr.Textbox(label="Upload Status", interactive=False)
343
+
344
  with gr.Row():
345
  pdf_send_btn = gr.Button("Ask (Questions)")
346
  pdf_reset_btn = gr.Button("♻ Reset LOGs")
347
  with gr.Row():
348
+ pdf_summary_file = gr.File(label="📥Download Summary File", interactive=False,scale =1)
349
  pdf_download_btn = gr.Button("📥 Download Summary")
350
+
351
  pdf_upload_btn.upload(handle_pdf_upload, inputs=[pdf_upload_btn, session_pdf], outputs=[pdf_upload_msg])
352
  pdf_send_btn.click(handle_text_pdf, inputs=[pdf_question, session_pdf], outputs=[pdf_output])
353
  pdf_reset_btn.click(lambda: (str(uuid.uuid4()), ""), outputs=[session_pdf, pdf_output])
354
  pdf_download_btn.click(download_pdf_summary, inputs=[session_pdf], outputs=[pdf_summary_file])
355
 
 
356
  with gr.Tab("🖼 Image OCR"):
357
  image_output = gr.Textbox(label="Answer (Text Only)", lines=5)
358
  with gr.Row():
359
+ image_upload_btn = gr.File(label="Upload Image", file_types=[".png", ".jpg", ".jpeg"], scale =1 )
360
  image_question = gr.Textbox(label="Ask question about Image", lines=3)
361
  image_upload_msg = gr.Textbox(label="Upload Status", interactive=False)
362
+
363
  with gr.Row():
364
  image_send_btn = gr.Button("Ask (Questions)")
365
  image_reset_btn = gr.Button("♻ Reset LOGs")
366
  with gr.Row():
367
+ image_summary_file = gr.File(label="📥Download Summary File", interactive=False,scale =1)
368
  image_download_btn = gr.Button("📥 Download Summary")
369
+
370
  image_upload_btn.upload(handle_image_upload, inputs=[image_upload_btn, session_image], outputs=[image_upload_msg, image_output])
371
  image_send_btn.click(handle_text_image, inputs=[image_question, session_image], outputs=[image_output])
372
  image_reset_btn.click(lambda: (str(uuid.uuid4()), ""), outputs=[session_image, image_output])
373
  image_download_btn.click(download_pdf_summary, inputs=[session_image], outputs=[image_summary_file])
374
 
375
  if __name__ == "__main__":
376
+ demo.launch()