Spaces:

asad9641
/

OmniSense-AI-Bot

Sleeping

App Files Files Community

asad9641 commited on Nov 22, 2025

Commit

10f89e3

verified ·

1 Parent(s): 7bf8d4b

Update app.py

Browse files

Files changed (1) hide show

app.py +322 -172

app.py CHANGED Viewed

@@ -1,220 +1,370 @@
 # ------------------ Gradio UI ------------------
 with gr.Blocks() as demo:
-    # --- THEME CSS: Blue -> Purple Neon with readable text ---
     gr.HTML(r"""
     <style>
-    /* PAGE BACKGROUND */
     body { background: linear-gradient(135deg, #eef2ff 0%, #f5e8ff 100%) !important; }
-    /* ---------------- Tabs ---------------- */
-    div[data-testid="tab-list"] button,
-    .tabs button {
-        background: linear-gradient(90deg, #0ea5e9, #7c3aed);
-        color: white !important;
-        font-weight: 700;
-        border-radius: 12px 12px 0 0;
-        padding: 10px 18px;
-        margin-right: 6px;
-        border: none;
-        box-shadow: 0 6px 18px rgba(124,58,237,0.18);
-        transition: transform 0.18s ease, box-shadow 0.18s ease;
-    }
-    div[data-testid="tab-list"] button:hover,
-    .tabs button:hover { transform: translateY(-3px); }
-    div[data-testid="tab-list"] button[aria-selected="true"],
-    .tabs button[aria-selected="true"] {
-        background: linear-gradient(90deg, #60a5fa, #a78bfa);
-        box-shadow: 0 8px 24px rgba(99,102,241,0.28);
-    }
-    /* ---------------- Global headers / markdown ---------------- */
-    .gradio-container h2, .gradio-container h3, .gradio-container h4,
-    .gradio-container .markdown { color: #1e1e1e; }
-    /* Custom title styling */
-    .app-title {
-        font-family: 'Segoe UI', Roboto, 'Helvetica Neue', Arial;
-        font-size: 22px;
-        color: #1f1f1f;
-        padding: 10px 14px;
-        border-radius: 10px;
-        background: linear-gradient(90deg, rgba(14,165,233,0.08), rgba(124,58,237,0.06));
-        display: inline-block;
-    }
-    /* ---------------- Buttons ---------------- */
-    .gr-button, .gradio-button, button {
-        background: linear-gradient(90deg, #06b6d4, #7c3aed) !important;
-        color: white !important;
-        border: none !important;
-        box-shadow: 0 8px 20px rgba(124,58,237,0.18);
-        border-radius: 10px !important;
-        padding: 8px 14px !important;
-        font-weight: 700 !important;
-        transition: transform 0.12s ease, box-shadow 0.12s ease;
-    }
-    .gr-button:hover, .gradio-button:hover, button:hover { transform: translateY(-3px); }
-    /* ---------------- Chat bubbles ---------------- */
-    .chatbot .message.user { background: #e4e7ff !important; color: #1f1f1f !important; border-radius: 16px 16px 16px 4px; padding: 10px 12px; }
-    .chatbot .message.assistant { background: #f1e4ff !important; color: #1f1f1f !important; border-radius: 16px 16px 4px 16px; padding: 10px 12px; }
-    /* Inputs and boxes readable */
-    input, textarea, .gr-textbox, .gr-textbox textarea {
-        background: #ffffff !important;
-        color: #1e1e1e !important;
-        border: 1px solid #cfcfcf !important;
-        border-radius: 8px !important;
-    }
-    /* ---------------- Mic button fixes ---------------- */
-    #mic_box {
-        position: relative !important;
-        display: flex !important;
-        flex-direction: column !important;
-        align-items: center !important;
-        justify-content: center !important;
-        background: #ffffff !important;
-        border: 2px solid #d3c7ff !important;
-        border-radius: 16px !important;
-        padding: 10px !important;
-        width: 70px;
-        height: 70px;
-    }
-    #mic_box button {
-        background: #6d28d9 !important;
-        border-radius: 50% !important;
-        width: 60px !important;
-        height: 60px !important;
-        display: flex !important;
-        align-items: center !important;
-        justify-content: center !important;
-        border: none !important;
-        box-shadow: 0 0 10px rgba(109,40,217,0.4) !important;
-    }
-    #mic_box button svg {
-        width: 28px !important;
-        height: 28px !important;
-        fill: #ffffff !important;
-        stroke: #ffffff !important;
-    }
-    #mic_box button:hover {
-        background: #8b5cf6 !important;
-        box-shadow: 0 0 14px rgba(139,92,246,0.6) !important;
-    }
-    #mic_box::after {
-        content: "Tap to Record";
-        display: block;
-        text-align: center;
-        font-size: 12px;
-        margin-top: 6px;
-        color: #4b4b4b;
-    }
     </style>
     """)
-    # Title area with colorful headline
-    gr.HTML("""
-    <div style='display:flex;align-items:center;gap:14px'>
-      <div class='app-title'>
-        <strong>🛠 Multi-Mode AI Assistant</strong>
-        <div style='font-size:14px;color:#4b4b4b'>Voice · PDF · Image — Blue·Purple Neon Theme</div>
-      </div>
-    </div>
-    """)
     session_voice = gr.State(str(uuid.uuid4()))
     session_pdf = gr.State(str(uuid.uuid4()))
     session_image = gr.State(str(uuid.uuid4()))
     with gr.Tab("🎤 Voice Chat"):
-        gr.HTML("""
-        <div style='margin-bottom:6px;'>
-          <h3 style='margin:0;padding:0;color:#1f1f1f'>🎤 Voice Chat — Speak naturally, get voice & text responses</h3>
-          <p style='margin:2px 0 6px;color:#333;font-size:13px'>
-          Hold and speak, ask general or knowledge-document questions. Enable enhancer for richer answers.
-          </p>
-        </div>
-        """)
         chat_voice = gr.Chatbot(height=320)
         with gr.Row():
-            mic = gr.Audio(type="filepath", label="", elem_id="mic_box")
             audio_output = gr.Audio(label="Assistant Voice Output", type="filepath", interactive=False)
-            tts_lang = gr.Dropdown(choices=["en", "ur"], value="en", label="TTS Language")
         with gr.Row():
             btn_general = gr.Button("⚡Ask General 🎯")
             btn_pdf = gr.Button("⚡Ask PDF 📄")
             btn_image = gr.Button("⚡Ask Image 🖼")
-            enhancer_toggle = gr.Checkbox(label="Enable Response Enhancer", value=False, scale=1)
-            tone_dropdown = gr.Dropdown(choices=["Helpful", "Formal", "Friendly"], value="Helpful", label="Enhancer Tone", scale=1)
         with gr.Row():
-            btn_reset_logs = gr.Button("♻ Reset LOGs", elem_id='reset_logs')
-            btn_download_logs = gr.Button("📥 Download Summary", elem_id='download_logs')
             Voice_summary_file = gr.File(label="📥Download Summary File", interactive=False)
         answer_voice = gr.Textbox(label="Assistant Answer (text)", lines=2, visible=False)
-        btn_general.click(fn=handle_voice_general,
-                          inputs=[mic, session_voice, tts_lang, enhancer_toggle, tone_dropdown],
-                          outputs=[answer_voice, audio_output, chat_voice])
-        btn_pdf.click(fn=handle_voice_pdf, inputs=[mic, session_pdf, tts_lang], outputs=[answer_voice, audio_output, chat_voice])
-        btn_image.click(fn=handle_voice_image, inputs=[mic, session_image, tts_lang], outputs=[answer_voice, audio_output, chat_voice])
-        btn_reset_logs.click(lambda: (str(uuid.uuid4()), [], None, None, ""), outputs=[session_voice, chat_voice, mic, audio_output, answer_voice])
-        btn_download_logs.click(download_pdf_summary, inputs=[session_voice], outputs=[Voice_summary_file])
-    # PDF Tab
     with gr.Tab("📄 PDF Summarizer"):
-        gr.HTML("""
-        <div style='margin-bottom:6px;'>
-          <h3 style='margin:0;padding:0;color:#1f1f1f'>📄 PDF Summarizer — Upload a PDF, ask questions</h3>
-          <p style='margin:2px 0 6px;color:#333;font-size:13px'>
-          Uploads are chunked and embedded so you can ask targeted questions about the document.
-          </p>
-        </div>
-        """)
         pdf_output = gr.Textbox(label="Answer (Text Only)", lines=5)
         with gr.Row():
-            pdf_upload_btn = gr.File(label="Upload PDF", file_types=[".pdf"], scale=1)
             pdf_question = gr.Textbox(label="Ask a question about PDF (text)", lines=3)
             pdf_upload_msg = gr.Textbox(label="Upload Status", interactive=False)
         with gr.Row():
             pdf_send_btn = gr.Button("Ask (Questions)")
             pdf_reset_btn = gr.Button("♻ Reset LOGs")
         with gr.Row():
-            pdf_summary_file = gr.File(label="📥Download Summary File", interactive=False, scale=1)
             pdf_download_btn = gr.Button("📥 Download Summary")
-        pdf_upload_btn.upload(handle_pdf_upload, inputs=[pdf_upload_btn, session_pdf], outputs=[pdf_upload_msg])
-        pdf_send_btn.click(handle_text_pdf, inputs=[pdf_question, session_pdf], outputs=[pdf_output])
-        pdf_reset_btn.click(lambda: (str(uuid.uuid4()), ""), outputs=[session_pdf, pdf_output])
-        pdf_download_btn.click(download_pdf_summary, inputs=[session_pdf], outputs=[pdf_summary_file])
-    # Image Tab
     with gr.Tab("🖼 Image OCR"):
-        gr.HTML("""
-        <div style='margin-bottom:6px;'>
-          <h3 style='margin:0;padding:0;color:#1f1f1f'>🖼 Image OCR — Extract text from images</h3>
-          <p style='margin:2px 0 6px;color:#333;font-size:13px'>
-          Upload an image, OCR runs, then ask questions about the extracted text.
-          </p>
-        </div>
-        """)
         image_output = gr.Textbox(label="Answer (Text Only)", lines=5)
         with gr.Row():
-            image_upload_btn = gr.File(label="Upload Image", file_types=[".png", ".jpg", ".jpeg"], scale=1)
             image_question = gr.Textbox(label="Ask question about Image", lines=3)
             image_upload_msg = gr.Textbox(label="Upload Status", interactive=False)
         with gr.Row():
             image_send_btn = gr.Button("Ask (Questions)")
             image_reset_btn = gr.Button("♻ Reset LOGs")
         with gr.Row():
-            image_summary_file = gr.File(label="📥Download Summary File", interactive=False, scale=1)
             image_download_btn = gr.Button("📥 Download Summary")
-        image_upload_btn.upload(handle_image_upload, inputs=[image_upload_btn, session_image], outputs=[image_upload_msg, image_output])
-        image_send_btn.click(handle_text_image, inputs=[image_question, session_image], outputs=[image_output])
-        image_reset_btn.click(lambda: (str(uuid.uuid4()), ""), outputs=[session_image, image_output])
-        image_download_btn.click(download_pdf_summary, inputs=[session_image], outputs=[image_summary_file])
-if __name__ == "__main__":
-    demo.launch()

+# app.py
+"""
+Multi-Mode AI Assistant (Voice, PDF, Image) with Wow-Factor Features
+- Preserves original functionality
+- UI: Blue → Purple Neon Theme
+- Fixes mic button, readable text, input placeholders
+"""
+import os
+import uuid
+import tempfile
+import requests
+from dotenv import load_dotenv
+from gtts import gTTS
+from PyPDF2 import PdfReader
+import gradio as gr
+from sentence_transformers import SentenceTransformer, util
+from fpdf import FPDF
+from datetime import datetime
+# ------------------ Load API Keys ------------------
+load_dotenv()
+GROQ_API_KEY = os.getenv("GROQ_API_KEY", "").strip()
+OCR_SPACE_API_KEY = os.getenv("OCR_SPACE_API_KEY", "").strip()
+if not GROQ_API_KEY:
+    raise ValueError("❌ GROQ_API_KEY missing. Set it in env / Hugging Face Secrets.")
+if not OCR_SPACE_API_KEY:
+    raise ValueError("❌ OCR_SPACE_API_KEY missing. Set it in env / Hugging Face Secrets.")
+HEADERS = {"Authorization": f"Bearer {GROQ_API_KEY}"}
+# ------------------ Global State ------------------
+SESSION_HISTORY = {}
+CHAT_DISPLAY = {}
+PDF_CONTENT = {}
+PDF_EMBEDS = {}
+IMAGE_TEXT = {}
+IMAGE_EMBEDS = {}
+CHUNK_SIZE = 1500
+# Load embedding model
+embed_model = SentenceTransformer("all-MiniLM-L6-v2")
+# ------------------ Helpers ------------------
+def _get_path_from_gr_file(gr_file):
+    if not gr_file:
+        return None
+    if isinstance(gr_file, str) and os.path.exists(gr_file):
+        return gr_file
+    try:
+        if hasattr(gr_file, "name") and os.path.exists(gr_file.name):
+            return gr_file.name
+    except Exception:
+        pass
+    if isinstance(gr_file, dict):
+        for key in ("name", "file_name", "filepath"):
+            if key in gr_file:
+                candidate = gr_file.get(key)
+                if isinstance(candidate, str) and os.path.exists(candidate):
+                    return candidate
+    return None
+def chunk_text(text, size=CHUNK_SIZE):
+    return [text[i:i + size] for i in range(0, len(text), size)]
+def synthesize_speech(text, lang="en"):
+    try:
+        if not text:
+            return None
+        tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
+        gTTS(text=text, lang=lang).save(tmp.name)
+        return tmp.name
+    except Exception as e:
+        print("TTS error:", e)
+        return None
+def select_relevant_chunk(question, chunks, chunk_embeds):
+    if not chunks or chunk_embeds is None:
+        return ""
+    q_emb = embed_model.encode(question, convert_to_tensor=True)
+    scores = util.cos_sim(q_emb, chunk_embeds)[0]
+    top_idx = int(scores.argmax().item())
+    return chunks[top_idx]
+def _chat_display_to_messages(chat_display):
+    msgs = []
+    for user, assistant in chat_display:
+        msgs.append({"role": "user", "content": user})
+        msgs.append({"role": "assistant", "content": assistant})
+    return msgs
+# ------------------ Transcription & LLM ------------------
+def transcribe_audio(audio_path):
+    if not audio_path or not os.path.exists(audio_path):
+        return "Error: audio file missing."
+    try:
+        url = "https://api.groq.com/openai/v1/audio/transcriptions"
+        with open(audio_path, "rb") as f:
+            files = {"file": (os.path.basename(audio_path), f, "audio/wav")}
+            data = {"model": "whisper-large-v3"}
+            resp = requests.post(url, headers=HEADERS, files=files, data=data, timeout=60)
+        resp.raise_for_status()
+        return resp.json().get("text", "") or ""
+    except Exception as e:
+        print("transcription error:", e)
+        return f"Error transcribing audio: {e}"
+def groq_chat_completion(messages):
+    body = {"model": "llama-3.1-8b-instant", "messages": messages}
+    try:
+        resp = requests.post("https://api/groq.com/openai/v1/chat/completions", headers=HEADERS, json=body, timeout=60)
+        resp.raise_for_status()
+        return resp.json()["choices"][0]["message"]["content"]
+    except Exception as e:
+        print("groq_chat_completion error:", e)
+        return f"Error generating response: {e}"
+def generate_response(session_id, user_text, enhancer_enabled=False, enhancer_tone="Helpful"):
+    if session_id not in SESSION_HISTORY:
+        SESSION_HISTORY[session_id] = []
+    SESSION_HISTORY[session_id].append({"role": "user", "content": user_text})
+    messages = [{"role": "system", "content": "You are a helpful AI assistant."}] + SESSION_HISTORY[session_id]
+    if enhancer_enabled:
+        messages.append({"role": "user", "content": f"Enhance response. Tone: {enhancer_tone}. Question: {user_text}"})
+    assistant_text = groq_chat_completion(messages)
+    SESSION_HISTORY[session_id].append({"role": "assistant", "content": assistant_text})
+    return assistant_text
+# ------------------ PDF handling ------------------
+def handle_pdf_upload(pdf_file, session_id):
+    path = _get_path_from_gr_file(pdf_file)
+    if not path:
+        return "No file uploaded or file unreadable."
+    try:
+        reader = PdfReader(path)
+        text = ""
+        for page in reader.pages:
+            text += (page.extract_text() or "") + "\n"
+        if not text.strip():
+            return "No extractable content found in PDF."
+        chunks = chunk_text(text)
+        PDF_CONTENT[session_id] = chunks
+        PDF_EMBEDS[session_id] = embed_model.encode(chunks, convert_to_tensor=True)
+        return f"PDF processed: {len(chunks)} chunks ready."
+    except Exception as e:
+        print("PDF upload error:", e)
+        return f"Error processing PDF: {e}"
+def handle_pdf_question(question, session_id):
+    if session_id not in PDF_CONTENT:
+        return "Document not found. Upload first."
+    chunk = select_relevant_chunk(question, PDF_CONTENT[session_id], PDF_EMBEDS[session_id])
+    messages = [
+        {"role": "system", "content": "You are a helpful assistant summarizing PDF content."},
+        {"role": "user", "content": f"PDF chunk:\n{chunk}\n\nQuestion: {question}"}
+    ]
+    assistant_text = groq_chat_completion(messages)
+    assistant_text = f"**Snippet from PDF:**\n{chunk[:200]}...\n\n**Answer:**\n{assistant_text}"
+    if session_id not in SESSION_HISTORY:
+        SESSION_HISTORY[session_id] = []
+    SESSION_HISTORY[session_id].append({"role": "assistant", "content": assistant_text})
+    return assistant_text
+# ------------------ Image OCR ------------------
+def ocr_space_file(image_path, api_key, language="eng"):
+    if not image_path or not os.path.exists(image_path):
+        return ""
+    try:
+        with open(image_path, "rb") as f:
+            payload = {"apikey": api_key, "language": language}
+            files = {"file": f}
+            r = requests.post("https://api.ocr.space/parse/image", files=files, data=payload, timeout=60)
+        r.raise_for_status()
+        j = r.json()
+        if j.get("IsErroredOnProcessing"):
+            print("OCR.space processing error:", j)
+            return ""
+        parsed = [pr.get("ParsedText", "") for pr in j.get("ParsedResults", [])]
+        return "\n".join(parsed)
+    except Exception as e:
+        print("ocr_space_file error:", e)
+        return ""
+def handle_image_upload(image_file, session_id):
+    path = _get_path_from_gr_file(image_file)
+    if not path:
+        return "No image uploaded or file unreadable.", ""
+    parsed = ocr_space_file(path, OCR_SPACE_API_KEY)
+    if not parsed.strip():
+        return "No extractable text found in the image.", ""
+    chunks = chunk_text(parsed)
+    IMAGE_TEXT[session_id] = chunks
+    IMAGE_EMBEDS[session_id] = embed_model.encode(chunks, convert_to_tensor=True)
+    return f"Image processed: {len(chunks)} chunks ready.", ""
+def handle_image_question(question, session_id):
+    if session_id not in IMAGE_TEXT:
+        return "Image not found. Upload first."
+    chunk = select_relevant_chunk(question, IMAGE_TEXT[session_id], IMAGE_EMBEDS[session_id])
+    messages = [
+        {"role": "system", "content": "You are a helpful assistant summarizing image text."},
+        {"role": "user", "content": f"Image chunk:\n{chunk}\n\nQuestion: {question}"}
+    ]
+    assistant_text = groq_chat_completion(messages)
+    assistant_text = f"**Snippet from Image:**\n{chunk[:200]}...\n\n**Answer:**\n{assistant_text}"
+    if session_id not in SESSION_HISTORY:
+        SESSION_HISTORY[session_id] = []
+    SESSION_HISTORY[session_id].append({"role": "assistant", "content": assistant_text})
+    return assistant_text
+# ------------------ PDF Generation ------------------
+def generate_pdf_file(text, filename_prefix="summary"):
+    pdf = FPDF()
+    pdf.add_page()
+    pdf.set_auto_page_break(auto=True, margin=15)
+    pdf.set_font("Arial", "B", size=14)
+    pdf.multi_cell(0, 8, f"Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M')}\n\n")
+    pdf.set_font("Arial", size=12)
+    for line in text.split("\n"):
+        pdf.multi_cell(0, 6, line)
+    file_path = f"/tmp/{filename_prefix}_{uuid.uuid4()}.pdf"
+    pdf.output(file_path)
+    return file_path
+def download_pdf_summary(session_id):
+    summary_text = "\n".join([m["content"] for m in SESSION_HISTORY.get(session_id, []) if m["role"]=="assistant"])
+    if not summary_text:
+        summary_text = "No summary available."
+    return generate_pdf_file(summary_text, "summary")
+# ------------------ Voice & Chat Handlers ------------------
+def _append_chat_display(session_id, user_text, assistant_text):
+    if session_id not in CHAT_DISPLAY:
+        CHAT_DISPLAY[session_id] = []
+    CHAT_DISPLAY[session_id].append((user_text, assistant_text))
+def handle_voice_general(audio_file, session_id, tts_lang="en", enhancer_enabled=False, enhancer_tone="Helpful"):
+    path = _get_path_from_gr_file(audio_file)
+    if not path:
+        return "No audio provided.", None, []
+    user_text = transcribe_audio(path)
+    assistant_text = generate_response(session_id, user_text, enhancer_enabled, enhancer_tone)
+    _append_chat_display(session_id, user_text, assistant_text)
+    audio_path = synthesize_speech(assistant_text, lang=tts_lang)
+    return assistant_text, audio_path, _chat_display_to_messages(CHAT_DISPLAY[session_id])
+def handle_voice_pdf(audio_file, session_id, tts_lang="en"):
+    path = _get_path_from_gr_file(audio_file)
+    if not path:
+        return "No audio provided.", None, []
+    user_text = transcribe_audio(path)
+    assistant_text = handle_pdf_question(user_text, session_id)
+    _append_chat_display(session_id, user_text, assistant_text)
+    audio_path = synthesize_speech(assistant_text, lang=tts_lang)
+    return assistant_text, audio_path, _chat_display_to_messages(CHAT_DISPLAY[session_id])
+def handle_voice_image(audio_file, session_id, tts_lang="en"):
+    path = _get_path_from_gr_file(audio_file)
+    if not path:
+        return "No audio provided.", None, []
+    user_text = transcribe_audio(path)
+    assistant_text = handle_image_question(user_text, session_id)
+    _append_chat_display(session_id, user_text, assistant_text)
+    audio_path = synthesize_speech(assistant_text, lang=tts_lang)
+    return assistant_text, audio_path, _chat_display_to_messages(CHAT_DISPLAY[session_id])
+def handle_text_general(user_text, session_id, enhancer_enabled=False, enhancer_tone="Helpful"):
+    assistant = generate_response(session_id, user_text, enhancer_enabled, enhancer_tone)
+    _append_chat_display(session_id, user_text, assistant)
+    return assistant, _chat_display_to_messages(CHAT_DISPLAY[session_id])
+def handle_text_pdf(question, session_id):
+    return handle_pdf_question(question, session_id)
+def handle_text_image(question, session_id):
+    return handle_image_question(question, session_id)
 # ------------------ Gradio UI ------------------
 with gr.Blocks() as demo:
+    # ---- Theme CSS ----
     gr.HTML(r"""
     <style>
     body { background: linear-gradient(135deg, #eef2ff 0%, #f5e8ff 100%) !important; }
+    .app-title, .gradio-container h3, .gradio-container h4, .gradio-container .markdown { color: #1f1f1f !important; }
+    #mic_box button { background:#6d28d9; width:60px;height:60px;border-radius:50%; display:flex; align-items:center; justify-content:center; }
+    #mic_box button svg { fill:#fff; stroke:#fff; width:28px;height:28px; }
+    #mic_box::after { content:"Tap to Record"; display:block; text-align:center; font-size:12px; margin-top:6px; color:#4b4b4b; }
+    input, textarea, .gr-textbox { background:#fff; color:#1e1e1e; border:1px solid #cfcfcf; border-radius:8px; }
+    .gr-chatbot .message.user { background:#e4e7ff; color:#1f1f1f; }
+    .gr-chatbot .message.assistant { background:#f1e4ff; color:#1f1f1f; }
+    .gr-button { color:#fff; font-weight:600; }
     </style>
     """)
+    # ---- Title ----
+    gr.HTML("""<div style='display:flex;align-items:center;gap:14px'>
+      <div class='app-title'><strong>🛠 Multi-Mode AI Assistant</strong>
+      <div style='font-size:13px;color:#4b4b4b'>Voice · PDF · Image — Blue·Purple Neon Theme</div></div>
+    </div>""")
     session_voice = gr.State(str(uuid.uuid4()))
     session_pdf = gr.State(str(uuid.uuid4()))
     session_image = gr.State(str(uuid.uuid4()))
+    # ---- Voice Chat Tab ----
     with gr.Tab("🎤 Voice Chat"):
         chat_voice = gr.Chatbot(height=320)
         with gr.Row():
+            mic = gr.Audio(type="filepath", label="🎤 Record Voice", elem_id="mic_box")
             audio_output = gr.Audio(label="Assistant Voice Output", type="filepath", interactive=False)
+            tts_lang = gr.Dropdown(choices=["en","ur"], value="en", label="TTS Language")
         with gr.Row():
             btn_general = gr.Button("⚡Ask General 🎯")
             btn_pdf = gr.Button("⚡Ask PDF 📄")
             btn_image = gr.Button("⚡Ask Image 🖼")
+            enhancer_toggle = gr.Checkbox(label="Enable Response Enhancer", value=False)
+            tone_dropdown = gr.Dropdown(choices=["Helpful","Formal","Friendly"], value="Helpful", label="Enhancer Tone")
         with gr.Row():
+            btn_reset_logs = gr.Button("♻ Reset LOGs")
+            btn_download_logs = gr.Button("📥 Download Summary")
             Voice_summary_file = gr.File(label="📥Download Summary File", interactive=False)
         answer_voice = gr.Textbox(label="Assistant Answer (text)", lines=2, visible=False)
+        btn_general.click(handle_voice_general, [mic, session_voice, tts_lang, enhancer_toggle, tone_dropdown],
+                          [answer_voice, audio_output, chat_voice])
+        btn_pdf.click(handle_voice_pdf, [mic, session_pdf, tts_lang], [answer_voice, audio_output, chat_voice])
+        btn_image.click(handle_voice_image, [mic, session_image, tts_lang], [answer_voice, audio_output, chat_voice])
+        btn_reset_logs.click(lambda: (str(uuid.uuid4()), [], None, None, ""), [session_voice, chat_voice, mic, audio_output, answer_voice])
+        btn_download_logs.click(download_pdf_summary, [session_voice], [Voice_summary_file])
+    # ---- PDF Tab ----
     with gr.Tab("📄 PDF Summarizer"):
         pdf_output = gr.Textbox(label="Answer (Text Only)", lines=5)
         with gr.Row():
+            pdf_upload_btn = gr.File(label="Upload PDF", file_types=[".pdf"])
             pdf_question = gr.Textbox(label="Ask a question about PDF (text)", lines=3)
             pdf_upload_msg = gr.Textbox(label="Upload Status", interactive=False)
         with gr.Row():
             pdf_send_btn = gr.Button("Ask (Questions)")
             pdf_reset_btn = gr.Button("♻ Reset LOGs")
         with gr.Row():
+            pdf_summary_file = gr.File(label="📥Download Summary File", interactive=False)
             pdf_download_btn = gr.Button("📥 Download Summary")
+        pdf_upload_btn.upload(handle_pdf_upload, [pdf_upload_btn, session_pdf], [pdf_upload_msg])
+        pdf_send_btn.click(handle_text_pdf, [pdf_question, session_pdf], [pdf_output])
+        pdf_reset_btn.click(lambda: (str(uuid.uuid4()), ""), [session_pdf, pdf_output])
+        pdf_download_btn.click(download_pdf_summary, [session_pdf], [pdf_summary_file])
+    # ---- Image Tab ----
     with gr.Tab("🖼 Image OCR"):
         image_output = gr.Textbox(label="Answer (Text Only)", lines=5)
         with gr.Row():
+            image_upload_btn = gr.File(label="Upload Image", file_types=[".png",".jpg",".jpeg"])
             image_question = gr.Textbox(label="Ask question about Image", lines=3)
             image_upload_msg = gr.Textbox(label="Upload Status", interactive=False)
         with gr.Row():
             image_send_btn = gr.Button("Ask (Questions)")
             image_reset_btn = gr.Button("♻ Reset LOGs")
         with gr.Row():
+            image_summary_file = gr.File(label="📥Download Summary File", interactive=False)
             image_download_btn = gr.Button("📥 Download Summary")
+        image_upload_btn.upload(handle_image_upload, [image_upload_btn, session_image], [image_upload_msg])
+        image_send_btn.click(handle_text_image, [image_question, session_image], [image_output])
+        image_reset_btn.click(lambda: (str(uuid.uuid4()), ""), [session_image, image_output])
+        image_download_btn.click(download_pdf_summary, [session_image], [image_summary_file])
+demo.launch()