Spaces:

asad9641
/

OmniSense-AI-Bot

Sleeping

App Files Files Community

asad9641 commited on Nov 22, 2025

Commit

9a0eaa2

verified ·

1 Parent(s): a54913f

Update app.py

Browse files

Files changed (1) hide show

app.py +33 -21

app.py CHANGED Viewed

@@ -109,7 +109,7 @@ def transcribe_audio(audio_path):
 def groq_chat_completion(messages):
     body = {"model": "llama-3.1-8b-instant", "messages": messages}
     try:
-        resp = requests.post("https://api/groq.com/openai/v1/chat/completions", headers=HEADERS, json=body, timeout=60)
         resp.raise_for_status()
         return resp.json()["choices"][0]["message"]["content"]
     except Exception as e:
@@ -159,6 +159,7 @@ def handle_pdf_question(question, session_id):
         {"role": "user", "content": f"PDF chunk:\n{chunk}\n\nQuestion: {question}"}
     ]
     assistant_text = groq_chat_completion(messages)
     assistant_text = f"**Snippet from PDF:**\n{chunk[:200]}...\n\n**Answer:**\n{assistant_text}"
     if session_id not in SESSION_HISTORY:
         SESSION_HISTORY[session_id] = []
@@ -283,13 +284,11 @@ def handle_text_image(question, session_id):
 with gr.Blocks() as demo:
     gr.HTML("""
     <style>
-        /* Audio recorder styling */
-        #mic_box audio { height: 50px !important; width: 200px !important; }
-        /* Chat bubbles */
-        .chatbot .user { background-color: #D1E8FF; color: #000; border-radius: 12px; padding:5px 10px; }
-        .chatbot .assistant { background-color: #FFE4B5; color: #000; border-radius: 12px; padding:5px 10px; }
-        /* Tabs styling */
-        .tabbutton { background: linear-gradient(90deg, #f6d365, #fda085); color: #fff !important; font-weight: bold; }
     </style>
     """)
     gr.Markdown("## 🛠 Multi-Mode AI Assistant (Voice, PDF, Image)")
@@ -297,23 +296,33 @@ with gr.Blocks() as demo:
     session_voice = gr.State(str(uuid.uuid4()))
     session_pdf = gr.State(str(uuid.uuid4()))
     session_image = gr.State(str(uuid.uuid4()))
     with gr.Tab("🎤 Voice Chat"):
-        chat_voice = gr.Chatbot(height=350)
         with gr.Row():
-            mic = gr.Audio(type="filepath", label="🎤 Record Voice (hold & speak)", elem_id="mic_box")
             audio_output = gr.Audio(label="Assistant Voice Output", type="filepath", interactive=False)
             tts_lang = gr.Dropdown(choices=["en", "ur"], value="en", label="TTS Language")
         with gr.Row():
             btn_general = gr.Button("⚡Ask General 🎯")
             btn_pdf = gr.Button("⚡Ask PDF 📄")
             btn_image = gr.Button("⚡Ask Image 🖼")
-            enhancer_toggle = gr.Checkbox(label="Enable Response Enhancer", value=False)
-            tone_dropdown = gr.Dropdown(choices=["Helpful","Formal","Friendly"], value="Helpful", label="Enhancer Tone")
         with gr.Row():
             btn_reset_logs = gr.Button("♻ Reset LOGs")
             btn_download_logs = gr.Button("📥 Download Summary")
-            Voice_summary_file = gr.File(label="📥Download Summary File", interactive=False)
         answer_voice = gr.Textbox(label="Assistant Answer (text)", lines=2, visible=False)
         btn_general.click(fn=handle_voice_general,
@@ -321,44 +330,47 @@ with gr.Blocks() as demo:
                           outputs=[answer_voice, audio_output, chat_voice])
         btn_pdf.click(fn=handle_voice_pdf, inputs=[mic, session_pdf, tts_lang], outputs=[answer_voice, audio_output, chat_voice])
         btn_image.click(fn=handle_voice_image, inputs=[mic, session_image, tts_lang], outputs=[answer_voice, audio_output, chat_voice])
         btn_reset_logs.click(lambda: (str(uuid.uuid4()), [], None, None, ""), outputs=[session_voice, chat_voice, mic, audio_output, answer_voice])
         btn_download_logs.click(download_pdf_summary, inputs=[session_voice], outputs=[Voice_summary_file])
-    # PDF Tab
     with gr.Tab("📄 PDF Summarizer"):
         pdf_output = gr.Textbox(label="Answer (Text Only)", lines=5)
         with gr.Row():
-            pdf_upload_btn = gr.File(label="Upload PDF", file_types=[".pdf"])
             pdf_question = gr.Textbox(label="Ask a question about PDF (text)", lines=3)
             pdf_upload_msg = gr.Textbox(label="Upload Status", interactive=False)
         with gr.Row():
             pdf_send_btn = gr.Button("Ask (Questions)")
             pdf_reset_btn = gr.Button("♻ Reset LOGs")
         with gr.Row():
-            pdf_summary_file = gr.File(label="📥Download Summary File", interactive=False)
             pdf_download_btn = gr.Button("📥 Download Summary")
         pdf_upload_btn.upload(handle_pdf_upload, inputs=[pdf_upload_btn, session_pdf], outputs=[pdf_upload_msg])
         pdf_send_btn.click(handle_text_pdf, inputs=[pdf_question, session_pdf], outputs=[pdf_output])
         pdf_reset_btn.click(lambda: (str(uuid.uuid4()), ""), outputs=[session_pdf, pdf_output])
         pdf_download_btn.click(download_pdf_summary, inputs=[session_pdf], outputs=[pdf_summary_file])
-    # Image Tab
     with gr.Tab("🖼 Image OCR"):
         image_output = gr.Textbox(label="Answer (Text Only)", lines=5)
         with gr.Row():
-            image_upload_btn = gr.File(label="Upload Image", file_types=[".png", ".jpg", ".jpeg"])
             image_question = gr.Textbox(label="Ask question about Image", lines=3)
             image_upload_msg = gr.Textbox(label="Upload Status", interactive=False)
         with gr.Row():
             image_send_btn = gr.Button("Ask (Questions)")
             image_reset_btn = gr.Button("♻ Reset LOGs")
         with gr.Row():
-            image_summary_file = gr.File(label="📥Download Summary File", interactive=False)
             image_download_btn = gr.Button("📥 Download Summary")
         image_upload_btn.upload(handle_image_upload, inputs=[image_upload_btn, session_image], outputs=[image_upload_msg, image_output])
         image_send_btn.click(handle_text_image, inputs=[image_question, session_image], outputs=[image_output])
         image_reset_btn.click(lambda: (str(uuid.uuid4()), ""), outputs=[session_image, image_output])
         image_download_btn.click(download_pdf_summary, inputs=[session_image], outputs=[image_summary_file])
 if __name__ == "__main__":
-    demo.launch()

 def groq_chat_completion(messages):
     body = {"model": "llama-3.1-8b-instant", "messages": messages}
     try:
+        resp = requests.post("https://api.groq.com/openai/v1/chat/completions", headers=HEADERS, json=body, timeout=60)
         resp.raise_for_status()
         return resp.json()["choices"][0]["message"]["content"]
     except Exception as e:
         {"role": "user", "content": f"PDF chunk:\n{chunk}\n\nQuestion: {question}"}
     ]
     assistant_text = groq_chat_completion(messages)
+    # Add snippet highlighting for wow factor
     assistant_text = f"**Snippet from PDF:**\n{chunk[:200]}...\n\n**Answer:**\n{assistant_text}"
     if session_id not in SESSION_HISTORY:
         SESSION_HISTORY[session_id] = []
 with gr.Blocks() as demo:
     gr.HTML("""
     <style>
+        /* Change height + width of the audio recorder box */
+        #mic_box audio {
+            height: 50px !important;   /* adjust height */
+            width: 200px !important;    /* adjust width (optional) */
+        }
     </style>
     """)
     gr.Markdown("## 🛠 Multi-Mode AI Assistant (Voice, PDF, Image)")
     session_voice = gr.State(str(uuid.uuid4()))
     session_pdf = gr.State(str(uuid.uuid4()))
     session_image = gr.State(str(uuid.uuid4()))
+# FIX: define pdf_summary_file BEFORE it is used
+    #pdf_summary_file = gr.File(label="Download Summary", visible=False)
     with gr.Tab("🎤 Voice Chat"):
+        chat_voice = gr.Chatbot( height=320)
         with gr.Row():
+            mic = gr.Audio(type="filepath",label="🎤 Record Voice (hold & speak)", elem_id="mic_box")
             audio_output = gr.Audio(label="Assistant Voice Output", type="filepath", interactive=False)
             tts_lang = gr.Dropdown(choices=["en", "ur"], value="en", label="TTS Language")
         with gr.Row():
             btn_general = gr.Button("⚡Ask General 🎯")
             btn_pdf = gr.Button("⚡Ask PDF 📄")
             btn_image = gr.Button("⚡Ask Image 🖼")
+            enhancer_toggle = gr.Checkbox(label="Enable Response Enhancer", value=False, scale =1)
+            tone_dropdown = gr.Dropdown(choices=["Helpful", "Formal", "Friendly"], value="Helpful", label="Enhancer Tone", scale =1)
         with gr.Row():
             btn_reset_logs = gr.Button("♻ Reset LOGs")
             btn_download_logs = gr.Button("📥 Download Summary")
+            Voice_summary_file = gr.File(label="📥Download Summary File", interactive=False,scale =1)
+            #btn_general = gr.Button("⚡Ask General 🎯")
+            #btn_pdf = gr.Button("⚡Ask PDF 📄")
+            #btn_image = gr.Button("⚡Ask Image 🖼")
+        #with gr.Row():
+            #text_input = gr.Textbox(label="Or type a question (General)",visible=False)
+            #btn_send_text = gr.Button("Send (Text General)",visible=False)
+            #btn_reset_logs = gr.Button("♻ Reset LOGs")
         answer_voice = gr.Textbox(label="Assistant Answer (text)", lines=2, visible=False)
         btn_general.click(fn=handle_voice_general,
                           outputs=[answer_voice, audio_output, chat_voice])
         btn_pdf.click(fn=handle_voice_pdf, inputs=[mic, session_pdf, tts_lang], outputs=[answer_voice, audio_output, chat_voice])
         btn_image.click(fn=handle_voice_image, inputs=[mic, session_image, tts_lang], outputs=[answer_voice, audio_output, chat_voice])
+       # btn_send_text.click(fn=handle_text_general, inputs=[text_input, session_voice, enhancer_toggle, tone_dropdown], outputs=[answer_voice, chat_voice])
         btn_reset_logs.click(lambda: (str(uuid.uuid4()), [], None, None, ""), outputs=[session_voice, chat_voice, mic, audio_output, answer_voice])
         btn_download_logs.click(download_pdf_summary, inputs=[session_voice], outputs=[Voice_summary_file])
     with gr.Tab("📄 PDF Summarizer"):
         pdf_output = gr.Textbox(label="Answer (Text Only)", lines=5)
         with gr.Row():
+            pdf_upload_btn = gr.File(label="Upload PDF", file_types=[".pdf"], scale=1 )
             pdf_question = gr.Textbox(label="Ask a question about PDF (text)", lines=3)
             pdf_upload_msg = gr.Textbox(label="Upload Status", interactive=False)
         with gr.Row():
             pdf_send_btn = gr.Button("Ask (Questions)")
             pdf_reset_btn = gr.Button("♻ Reset LOGs")
         with gr.Row():
+            pdf_summary_file = gr.File(label="📥Download Summary File", interactive=False,scale =1)
             pdf_download_btn = gr.Button("📥 Download Summary")
         pdf_upload_btn.upload(handle_pdf_upload, inputs=[pdf_upload_btn, session_pdf], outputs=[pdf_upload_msg])
         pdf_send_btn.click(handle_text_pdf, inputs=[pdf_question, session_pdf], outputs=[pdf_output])
         pdf_reset_btn.click(lambda: (str(uuid.uuid4()), ""), outputs=[session_pdf, pdf_output])
         pdf_download_btn.click(download_pdf_summary, inputs=[session_pdf], outputs=[pdf_summary_file])
     with gr.Tab("🖼 Image OCR"):
         image_output = gr.Textbox(label="Answer (Text Only)", lines=5)
         with gr.Row():
+            image_upload_btn = gr.File(label="Upload Image", file_types=[".png", ".jpg", ".jpeg"], scale =1 )
             image_question = gr.Textbox(label="Ask question about Image", lines=3)
             image_upload_msg = gr.Textbox(label="Upload Status", interactive=False)
         with gr.Row():
             image_send_btn = gr.Button("Ask (Questions)")
             image_reset_btn = gr.Button("♻ Reset LOGs")
         with gr.Row():
+            image_summary_file = gr.File(label="📥Download Summary File", interactive=False,scale =1)
             image_download_btn = gr.Button("📥 Download Summary")
         image_upload_btn.upload(handle_image_upload, inputs=[image_upload_btn, session_image], outputs=[image_upload_msg, image_output])
         image_send_btn.click(handle_text_image, inputs=[image_question, session_image], outputs=[image_output])
         image_reset_btn.click(lambda: (str(uuid.uuid4()), ""), outputs=[session_image, image_output])
         image_download_btn.click(download_pdf_summary, inputs=[session_image], outputs=[image_summary_file])
 if __name__ == "__main__":
+    demo.launch()