Spaces:

asad9641
/

OmniSense-AI-Bot

Sleeping

App Files Files Community

asad9641 commited on Nov 22, 2025

Commit

ec41711

verified ·

1 Parent(s): 0be9386

Update app.py

Browse files

Files changed (1) hide show

app.py +251 -299

app.py CHANGED Viewed

@@ -1,10 +1,8 @@
 # app.py
 """
-Multi-Mode AI Assistant (Voice, PDF, Image) with Wow-Factor Features
-- Preserves original features
-- Adds snippet highlighting, cross-modal memory, styled PDF generation
-- Live waveform placeholder for voice input
-- Modular & Hugging Face safe
 """
 import os
 import uuid
@@ -18,16 +16,12 @@ from sentence_transformers import SentenceTransformer, util
 from fpdf import FPDF
 from datetime import datetime
-# ------------------ Load API Keys ------------------
 load_dotenv()
 GROQ_API_KEY = os.getenv("GROQ_API_KEY", "").strip()
 OCR_SPACE_API_KEY = os.getenv("OCR_SPACE_API_KEY", "").strip()
-if not GROQ_API_KEY:
-    raise ValueError("❌ GROQ_API_KEY missing. Set it in env / Hugging Face Secrets.")
-if not OCR_SPACE_API_KEY:
-    raise ValueError("❌ OCR_SPACE_API_KEY missing. Set it in env / Hugging Face Secrets.")
 HEADERS = {"Authorization": f"Bearer {GROQ_API_KEY}"}
 # ------------------ Global State ------------------
@@ -38,339 +32,297 @@ PDF_EMBEDS = {}
 IMAGE_TEXT = {}
 IMAGE_EMBEDS = {}
 CHUNK_SIZE = 1500
-# Load embedding model
 embed_model = SentenceTransformer("all-MiniLM-L6-v2")
 # ------------------ Helpers ------------------
 def _get_path_from_gr_file(gr_file):
-    if not gr_file:
-        return None
-    if isinstance(gr_file, str) and os.path.exists(gr_file):
-        return gr_file
     try:
-        if hasattr(gr_file, "name") and os.path.exists(gr_file.name):
-            return gr_file.name
-    except Exception:
-        pass
-    if isinstance(gr_file, dict):
-        for key in ("name", "file_name", "filepath"):
             if key in gr_file:
-                candidate = gr_file.get(key)
-                if isinstance(candidate, str) and os.path.exists(candidate):
-                    return candidate
     return None
-def chunk_text(text, size=CHUNK_SIZE):
-    return [text[i:i + size] for i in range(0, len(text), size)]
-def synthesize_speech(text, lang="en"):
-    try:
-        if not text:
-            return None
-        tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
-        gTTS(text=text, lang=lang).save(tmp.name)
-        return tmp.name
-    except Exception as e:
-        print("TTS error:", e)
-        return None
-def select_relevant_chunk(question, chunks, chunk_embeds):
-    if not chunks or chunk_embeds is None:
-        return ""
-    q_emb = embed_model.encode(question, convert_to_tensor=True)
-    scores = util.cos_sim(q_emb, chunk_embeds)[0]
-    top_idx = int(scores.argmax().item())
     return chunks[top_idx]
 def _chat_display_to_messages(chat_display):
-    msgs = []
-    for user, assistant in chat_display:
-        msgs.append({"role": "user", "content": user})
-        msgs.append({"role": "assistant", "content": assistant})
     return msgs
-# ------------------ Transcription & LLM ------------------
 def transcribe_audio(audio_path):
     if not audio_path or not os.path.exists(audio_path):
         return "Error: audio file missing."
     try:
-        url = "https://api.groq.com/openai/v1/audio/transcriptions"
-        with open(audio_path, "rb") as f:
-            files = {"file": (os.path.basename(audio_path), f, "audio/wav")}
-            data = {"model": "whisper-large-v3"}
-            resp = requests.post(url, headers=HEADERS, files=files, data=data, timeout=60)
         resp.raise_for_status()
-        return resp.json().get("text", "") or ""
     except Exception as e:
-        print("transcription error:", e)
         return f"Error transcribing audio: {e}"
 def groq_chat_completion(messages):
-    body = {"model": "llama-3.1-8b-instant", "messages": messages}
     try:
-        resp = requests.post("https://api.groq.com/openai/v1/chat/completions", headers=HEADERS, json=body, timeout=60)
         resp.raise_for_status()
         return resp.json()["choices"][0]["message"]["content"]
     except Exception as e:
-        print("groq_chat_completion error:", e)
         return f"Error generating response: {e}"
-def generate_response(session_id, user_text, enhancer_enabled=False, enhancer_tone="Helpful"):
-    if session_id not in SESSION_HISTORY:
-        SESSION_HISTORY[session_id] = []
-    SESSION_HISTORY[session_id].append({"role": "user", "content": user_text})
-    messages = [{"role": "system", "content": "You are a helpful AI assistant."}] + SESSION_HISTORY[session_id]
     if enhancer_enabled:
-        messages.append({"role": "user", "content": f"Enhance response. Tone: {enhancer_tone}. Question: {user_text}"})
-    assistant_text = groq_chat_completion(messages)
-    SESSION_HISTORY[session_id].append({"role": "assistant", "content": assistant_text})
     return assistant_text
-# ------------------ PDF handling ------------------
-def handle_pdf_upload(pdf_file, session_id):
-    path = _get_path_from_gr_file(pdf_file)
-    if not path:
-        return "No file uploaded or file unreadable."
     try:
-        reader = PdfReader(path)
-        text = ""
-        for page in reader.pages:
-            text += (page.extract_text() or "") + "\n"
-        if not text.strip():
-            return "No extractable content found in PDF."
-        chunks = chunk_text(text)
-        PDF_CONTENT[session_id] = chunks
-        PDF_EMBEDS[session_id] = embed_model.encode(chunks, convert_to_tensor=True)
-        return f"PDF processed: {len(chunks)} chunks ready."
-    except Exception as e:
-        print("PDF upload error:", e)
-        return f"Error processing PDF: {e}"
-def handle_pdf_question(question, session_id):
-    if session_id not in PDF_CONTENT:
-        return "Document not found. Upload first."
-    chunk = select_relevant_chunk(question, PDF_CONTENT[session_id], PDF_EMBEDS[session_id])
-    messages = [
-        {"role": "system", "content": "You are a helpful assistant summarizing PDF content."},
-        {"role": "user", "content": f"PDF chunk:\n{chunk}\n\nQuestion: {question}"}
-    ]
-    assistant_text = groq_chat_completion(messages)
-    # Add snippet highlighting for wow factor
-    assistant_text = f"**Snippet from PDF:**\n{chunk[:200]}...\n\n**Answer:**\n{assistant_text}"
-    if session_id not in SESSION_HISTORY:
-        SESSION_HISTORY[session_id] = []
-    SESSION_HISTORY[session_id].append({"role": "assistant", "content": assistant_text})
     return assistant_text
 # ------------------ Image OCR ------------------
-def ocr_space_file(image_path, api_key, language="eng"):
-    if not image_path or not os.path.exists(image_path):
-        return ""
     try:
-        with open(image_path, "rb") as f:
-            payload = {"apikey": api_key, "language": language}
-            files = {"file": f}
-            r = requests.post("https://api.ocr.space/parse/image", files=files, data=payload, timeout=60)
         r.raise_for_status()
-        j = r.json()
-        if j.get("IsErroredOnProcessing"):
-            print("OCR.space processing error:", j)
-            return ""
-        parsed = [pr.get("ParsedText", "") for pr in j.get("ParsedResults", [])]
         return "\n".join(parsed)
-    except Exception as e:
-        print("ocr_space_file error:", e)
-        return ""
-def handle_image_upload(image_file, session_id):
-    path = _get_path_from_gr_file(image_file)
-    if not path:
-        return "No image uploaded or file unreadable.", ""
-    parsed = ocr_space_file(path, OCR_SPACE_API_KEY)
-    if not parsed.strip():
-        return "No extractable text found in the image.", ""
-    chunks = chunk_text(parsed)
-    IMAGE_TEXT[session_id] = chunks
-    IMAGE_EMBEDS[session_id] = embed_model.encode(chunks, convert_to_tensor=True)
-    return f"Image processed: {len(chunks)} chunks ready.", ""
-def handle_image_question(question, session_id):
-    if session_id not in IMAGE_TEXT:
-        return "Image not found. Upload first."
-    chunk = select_relevant_chunk(question, IMAGE_TEXT[session_id], IMAGE_EMBEDS[session_id])
-    messages = [
-        {"role": "system", "content": "You are a helpful assistant summarizing image text."},
-        {"role": "user", "content": f"Image chunk:\n{chunk}\n\nQuestion: {question}"}
-    ]
-    assistant_text = groq_chat_completion(messages)
-    assistant_text = f"**Snippet from Image:**\n{chunk[:200]}...\n\n**Answer:**\n{assistant_text}"
-    if session_id not in SESSION_HISTORY:
-        SESSION_HISTORY[session_id] = []
-    SESSION_HISTORY[session_id].append({"role": "assistant", "content": assistant_text})
     return assistant_text
-# ------------------ PDF Generation ------------------
-def generate_pdf_file(text, filename_prefix="summary"):
-    pdf = FPDF()
     pdf.add_page()
-    pdf.set_auto_page_break(auto=True, margin=15)
-    pdf.set_font("Arial", "B", size=14)
-    pdf.multi_cell(0, 8, f"Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M')}\n\n")
-    pdf.set_font("Arial", size=12)
-    for line in text.split("\n"):
-        pdf.multi_cell(0, 6, line)
-    file_path = f"/tmp/{filename_prefix}_{uuid.uuid4()}.pdf"
-    pdf.output(file_path)
-    return file_path
 def download_pdf_summary(session_id):
-    summary_text = "\n".join([m["content"] for m in SESSION_HISTORY.get(session_id, []) if m["role"]=="assistant"])
-    if not summary_text:
-        summary_text = "No summary available."
-    return generate_pdf_file(summary_text, "summary")
-# ------------------ Voice & Chat Handlers ------------------
-def _append_chat_display(session_id, user_text, assistant_text):
-    if session_id not in CHAT_DISPLAY:
-        CHAT_DISPLAY[session_id] = []
-    CHAT_DISPLAY[session_id].append((user_text, assistant_text))
-def handle_voice_general(audio_file, session_id, tts_lang="en", enhancer_enabled=False, enhancer_tone="Helpful"):
-    path = _get_path_from_gr_file(audio_file)
-    if not path:
-        return "No audio provided.", None, []
-    user_text = transcribe_audio(path)
-    assistant_text = generate_response(session_id, user_text, enhancer_enabled, enhancer_tone)
-    _append_chat_display(session_id, user_text, assistant_text)
-    audio_path = synthesize_speech(assistant_text, lang=tts_lang)
-    return assistant_text, audio_path, _chat_display_to_messages(CHAT_DISPLAY[session_id])
-def handle_voice_pdf(audio_file, session_id, tts_lang="en"):
-    path = _get_path_from_gr_file(audio_file)
-    if not path:
-        return "No audio provided.", None, []
-    user_text = transcribe_audio(path)
-    assistant_text = handle_pdf_question(user_text, session_id)
-    _append_chat_display(session_id, user_text, assistant_text)
-    audio_path = synthesize_speech(assistant_text, lang=tts_lang)
-    return assistant_text, audio_path, _chat_display_to_messages(CHAT_DISPLAY[session_id])
-def handle_voice_image(audio_file, session_id, tts_lang="en"):
-    path = _get_path_from_gr_file(audio_file)
-    if not path:
-        return "No audio provided.", None, []
-    user_text = transcribe_audio(path)
-    assistant_text = handle_image_question(user_text, session_id)
-    _append_chat_display(session_id, user_text, assistant_text)
-    audio_path = synthesize_speech(assistant_text, lang=tts_lang)
-    return assistant_text, audio_path, _chat_display_to_messages(CHAT_DISPLAY[session_id])
-def handle_text_general(user_text, session_id, enhancer_enabled=False, enhancer_tone="Helpful"):
-    assistant = generate_response(session_id, user_text, enhancer_enabled, enhancer_tone)
-    _append_chat_display(session_id, user_text, assistant)
-    return assistant, _chat_display_to_messages(CHAT_DISPLAY[session_id])
-def handle_text_pdf(question, session_id):
-    return handle_pdf_question(question, session_id)
-def handle_text_image(question, session_id):
-    return handle_image_question(question, session_id)
-# ------------------ Gradio UI ------------------
 with gr.Blocks() as demo:
     gr.HTML("""
     <style>
-        /* Change height + width of the audio recorder box */
-        #mic_box audio {
-            height: 50px !important;   /* adjust height */
-            width: 200px !important;    /* adjust width (optional) */
-        }
     </style>
     """)
-    gr.Markdown("## 🛠 Multi-Mode AI Assistant (Voice, PDF, Image)")
-    session_voice = gr.State(str(uuid.uuid4()))
-    session_pdf = gr.State(str(uuid.uuid4()))
-    session_image = gr.State(str(uuid.uuid4()))
-# FIX: define pdf_summary_file BEFORE it is used
-    #pdf_summary_file = gr.File(label="Download Summary", visible=False)
-    with gr.Tab("🎤 Voice Chat"):
-        chat_voice = gr.Chatbot(t, height=320)
-        with gr.Row():
-            mic = gr.Audio(type="filepath",label="🎤 Record Voice (hold & speak)", elem_id="mic_box")
-            audio_output = gr.Audio(label="Assistant Voice Output", type="filepath", interactive=False)
-            tts_lang = gr.Dropdown(choices=["en", "ur"], value="en", label="TTS Language")
-        with gr.Row():
-            btn_general = gr.Button("⚡Ask General 🎯")
-            btn_pdf = gr.Button("⚡Ask PDF 📄")
-            btn_image = gr.Button("⚡Ask Image 🖼")
-            enhancer_toggle = gr.Checkbox(label="Enable Response Enhancer", value=False, scale =1)
-            tone_dropdown = gr.Dropdown(choices=["Helpful", "Formal", "Friendly"], value="Helpful", label="Enhancer Tone", scale =1)
-        with gr.Row():
-            btn_reset_logs = gr.Button("♻ Reset LOGs")
-            btn_download_logs = gr.Button("📥 Download Summary")
-            Voice_summary_file = gr.File(label="📥Download Summary File", interactive=False,scale =1)
-            #btn_general = gr.Button("⚡Ask General 🎯")
-            #btn_pdf = gr.Button("⚡Ask PDF 📄")
-            #btn_image = gr.Button("⚡Ask Image 🖼")
-        #with gr.Row():
-            #text_input = gr.Textbox(label="Or type a question (General)",visible=False)
-            #btn_send_text = gr.Button("Send (Text General)",visible=False)
-            #btn_reset_logs = gr.Button("♻ Reset LOGs")
-        answer_voice = gr.Textbox(label="Assistant Answer (text)", lines=2, visible=False)
-        btn_general.click(fn=handle_voice_general,
-                          inputs=[mic, session_voice, tts_lang, enhancer_toggle, tone_dropdown],
-                          outputs=[answer_voice, audio_output, chat_voice])
-        btn_pdf.click(fn=handle_voice_pdf, inputs=[mic, session_pdf, tts_lang], outputs=[answer_voice, audio_output, chat_voice])
-        btn_image.click(fn=handle_voice_image, inputs=[mic, session_image, tts_lang], outputs=[answer_voice, audio_output, chat_voice])
-       # btn_send_text.click(fn=handle_text_general, inputs=[text_input, session_voice, enhancer_toggle, tone_dropdown], outputs=[answer_voice, chat_voice])
-        btn_reset_logs.click(lambda: (str(uuid.uuid4()), [], None, None, ""), outputs=[session_voice, chat_voice, mic, audio_output, answer_voice])
-        btn_download_logs.click(download_pdf_summary, inputs=[session_voice], outputs=[Voice_summary_file])
-    with gr.Tab("📄 PDF Summarizer"):
-        pdf_output = gr.Textbox(label="Answer (Text Only)", lines=5)
-        with gr.Row():
-            pdf_upload_btn = gr.File(label="Upload PDF", file_types=[".pdf"], scale=1 )
-            pdf_question = gr.Textbox(label="Ask a question about PDF (text)", lines=3)
-            pdf_upload_msg = gr.Textbox(label="Upload Status", interactive=False)
-        with gr.Row():
-            pdf_send_btn = gr.Button("Ask (Questions)")
-            pdf_reset_btn = gr.Button("♻ Reset LOGs")
-        with gr.Row():
-            pdf_summary_file = gr.File(label="📥Download Summary File", interactive=False,scale =1)
-            pdf_download_btn = gr.Button("📥 Download Summary")
-        pdf_upload_btn.upload(handle_pdf_upload, inputs=[pdf_upload_btn, session_pdf], outputs=[pdf_upload_msg])
-        pdf_send_btn.click(handle_text_pdf, inputs=[pdf_question, session_pdf], outputs=[pdf_output])
-        pdf_reset_btn.click(lambda: (str(uuid.uuid4()), ""), outputs=[session_pdf, pdf_output])
-        pdf_download_btn.click(download_pdf_summary, inputs=[session_pdf], outputs=[pdf_summary_file])
-    with gr.Tab("🖼 Image OCR"):
-        image_output = gr.Textbox(label="Answer (Text Only)", lines=5)
-        with gr.Row():
-            image_upload_btn = gr.File(label="Upload Image", file_types=[".png", ".jpg", ".jpeg"], scale =1 )
-            image_question = gr.Textbox(label="Ask question about Image", lines=3)
-            image_upload_msg = gr.Textbox(label="Upload Status", interactive=False)
-        with gr.Row():
-            image_send_btn = gr.Button("Ask (Questions)")
-            image_reset_btn = gr.Button("♻ Reset LOGs")
-        with gr.Row():
-            image_summary_file = gr.File(label="📥Download Summary File", interactive=False,scale =1)
-            image_download_btn = gr.Button("📥 Download Summary")
-        image_upload_btn.upload(handle_image_upload, inputs=[image_upload_btn, session_image], outputs=[image_upload_msg, image_output])
-        image_send_btn.click(handle_text_image, inputs=[image_question, session_image], outputs=[image_output])
-        image_reset_btn.click(lambda: (str(uuid.uuid4()), ""), outputs=[session_image, image_output])
-        image_download_btn.click(download_pdf_summary, inputs=[session_image], outputs=[image_summary_file])
-if __name__ == "__main__":
-    demo.launch()

 # app.py
 """
+Multi-Mode AI Assistant (Voice, PDF, Image) with full colorful website-like UI
+- All functionality remains intact
+- Custom tabs, bright buttons, dark background, visible text
 """
 import os
 import uuid
 from fpdf import FPDF
 from datetime import datetime
+# ------------------ API Keys ------------------
 load_dotenv()
 GROQ_API_KEY = os.getenv("GROQ_API_KEY", "").strip()
 OCR_SPACE_API_KEY = os.getenv("OCR_SPACE_API_KEY", "").strip()
+if not GROQ_API_KEY: raise ValueError("❌ GROQ_API_KEY missing.")
+if not OCR_SPACE_API_KEY: raise ValueError("❌ OCR_SPACE_API_KEY missing.")
 HEADERS = {"Authorization": f"Bearer {GROQ_API_KEY}"}
 # ------------------ Global State ------------------
 IMAGE_TEXT = {}
 IMAGE_EMBEDS = {}
 CHUNK_SIZE = 1500
 embed_model = SentenceTransformer("all-MiniLM-L6-v2")
 # ------------------ Helpers ------------------
 def _get_path_from_gr_file(gr_file):
+    if not gr_file: return None
+    if isinstance(gr_file,str) and os.path.exists(gr_file): return gr_file
     try:
+        if hasattr(gr_file,"name") and os.path.exists(gr_file.name): return gr_file.name
+    except: pass
+    if isinstance(gr_file,dict):
+        for key in ("name","file_name","filepath"):
             if key in gr_file:
+                candidate=gr_file.get(key)
+                if isinstance(candidate,str) and os.path.exists(candidate): return candidate
     return None
+def chunk_text(text,size=CHUNK_SIZE):
+    return [text[i:i+size] for i in range(0,len(text),size)]
+def synthesize_speech(text,lang="en"):
+    if not text: return None
+    tmp=tempfile.NamedTemporaryFile(delete=False,suffix=".mp3")
+    gTTS(text=text,lang=lang).save(tmp.name)
+    return tmp.name
+def select_relevant_chunk(question,chunks,chunk_embeds):
+    if not chunks or chunk_embeds is None: return ""
+    q_emb=embed_model.encode(question,convert_to_tensor=True)
+    scores=util.cos_sim(q_emb,chunk_embeds)[0]
+    top_idx=int(scores.argmax().item())
     return chunks[top_idx]
 def _chat_display_to_messages(chat_display):
+    msgs=[]
+    for user,assistant in chat_display:
+        msgs.append({"role":"user","content":user})
+        msgs.append({"role":"assistant","content":assistant})
     return msgs
+# ------------------ LLM & Transcription ------------------
 def transcribe_audio(audio_path):
     if not audio_path or not os.path.exists(audio_path):
         return "Error: audio file missing."
     try:
+        url="https://api.groq.com/openai/v1/audio/transcriptions"
+        with open(audio_path,"rb") as f:
+            files={"file":(os.path.basename(audio_path),f,"audio/wav")}
+            data={"model":"whisper-large-v3"}
+            resp=requests.post(url,headers=HEADERS,files=files,data=data,timeout=60)
         resp.raise_for_status()
+        return resp.json().get("text","") or ""
     except Exception as e:
+        print("Transcription error:",e)
         return f"Error transcribing audio: {e}"
 def groq_chat_completion(messages):
     try:
+        body={"model":"llama-3.1-8b-instant","messages":messages}
+        resp=requests.post("https://api.groq.com/openai/v1/chat/completions",headers=HEADERS,json=body,timeout=60)
         resp.raise_for_status()
         return resp.json()["choices"][0]["message"]["content"]
     except Exception as e:
+        print("Chat error:",e)
         return f"Error generating response: {e}"
+def generate_response(session_id,user_text,enhancer_enabled=False,enhancer_tone="Helpful"):
+    if session_id not in SESSION_HISTORY: SESSION_HISTORY[session_id]=[]
+    SESSION_HISTORY[session_id].append({"role":"user","content":user_text})
+    messages=[{"role":"system","content":"You are a helpful AI assistant."}]+SESSION_HISTORY[session_id]
     if enhancer_enabled:
+        messages.append({"role":"user","content":f"Enhance response. Tone: {enhancer_tone}. Question: {user_text}"})
+    assistant_text=groq_chat_completion(messages)
+    SESSION_HISTORY[session_id].append({"role":"assistant","content":assistant_text})
     return assistant_text
+# ------------------ PDF ------------------
+def handle_pdf_upload(pdf_file,session_id):
+    path=_get_path_from_gr_file(pdf_file)
+    if not path: return "No file uploaded."
     try:
+        reader=PdfReader(path)
+        text="".join([page.extract_text() or "" for page in reader.pages])
+        if not text.strip(): return "No extractable content."
+        chunks=chunk_text(text)
+        PDF_CONTENT[session_id]=chunks
+        PDF_EMBEDS[session_id]=embed_model.encode(chunks,convert_to_tensor=True)
+        return f"PDF processed: {len(chunks)} chunks."
+    except Exception as e: return f"PDF error: {e}"
+def handle_pdf_question(question,session_id):
+    if session_id not in PDF_CONTENT: return "Upload PDF first."
+    chunk=select_relevant_chunk(question,PDF_CONTENT[session_id],PDF_EMBEDS[session_id])
+    messages=[{"role":"system","content":"Summarize PDF."},{"role":"user","content":f"PDF chunk:\n{chunk}\n\nQuestion:{question}"}]
+    assistant_text=groq_chat_completion(messages)
+    assistant_text=f"**Snippet from PDF:**\n{chunk[:200]}...\n\n**Answer:**\n{assistant_text}"
+    if session_id not in SESSION_HISTORY: SESSION_HISTORY[session_id]=[]
+    SESSION_HISTORY[session_id].append({"role":"assistant","content":assistant_text})
     return assistant_text
 # ------------------ Image OCR ------------------
+def ocr_space_file(image_path,api_key,language="eng"):
+    if not image_path or not os.path.exists(image_path): return ""
     try:
+        with open(image_path,"rb") as f:
+            payload={"apikey":api_key,"language":language}
+            files={"file":f}
+            r=requests.post("https://api.ocr.space/parse/image",files=files,data=payload,timeout=60)
         r.raise_for_status()
+        j=r.json()
+        if j.get("IsErroredOnProcessing"): return ""
+        parsed=[pr.get("ParsedText","") for pr in j.get("ParsedResults",[])]
         return "\n".join(parsed)
+    except Exception as e: print("OCR error:",e); return ""
+def handle_image_upload(image_file,session_id):
+    path=_get_path_from_gr_file(image_file)
+    if not path: return "No image uploaded.",""
+    parsed=ocr_space_file(path,OCR_SPACE_API_KEY)
+    if not parsed.strip(): return "No text found.",""
+    chunks=chunk_text(parsed)
+    IMAGE_TEXT[session_id]=chunks
+    IMAGE_EMBEDS[session_id]=embed_model.encode(chunks,convert_to_tensor=True)
+    return f"Image processed: {len(chunks)} chunks.",""
+def handle_image_question(question,session_id):
+    if session_id not in IMAGE_TEXT: return "Upload Image first."
+    chunk=select_relevant_chunk(question,IMAGE_TEXT[session_id],IMAGE_EMBEDS[session_id])
+    messages=[{"role":"system","content":"Summarize Image text"},{"role":"user","content":f"Image chunk:\n{chunk}\n\nQuestion:{question}"}]
+    assistant_text=groq_chat_completion(messages)
+    assistant_text=f"**Snippet from Image:**\n{chunk[:200]}...\n\n**Answer:**\n{assistant_text}"
+    if session_id not in SESSION_HISTORY: SESSION_HISTORY[session_id]=[]
+    SESSION_HISTORY[session_id].append({"role":"assistant","content":assistant_text})
     return assistant_text
+# ------------------ PDF Download ------------------
+def generate_pdf_file(text,filename_prefix="summary"):
+    pdf=FPDF()
     pdf.add_page()
+    pdf.set_auto_page_break(True,margin=15)
+    pdf.set_font("Arial","B",14)
+    pdf.multi_cell(0,8,f"Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M')}\n\n")
+    pdf.set_font("Arial","",12)
+    for line in text.split("\n"): pdf.multi_cell(0,6,line)
+    path=f"/tmp/{filename_prefix}_{uuid.uuid4()}.pdf"
+    pdf.output(path)
+    return path
 def download_pdf_summary(session_id):
+    summary="\n".join([m["content"] for m in SESSION_HISTORY.get(session_id,[]) if m["role"]=="assistant"])
+    if not summary: summary="No summary available."
+    return generate_pdf_file(summary,"summary")
+# ------------------ Voice ------------------
+def _append_chat_display(session_id,user_text,assistant_text):
+    if session_id not in CHAT_DISPLAY: CHAT_DISPLAY[session_id]=[]
+    CHAT_DISPLAY[session_id].append((user_text,assistant_text))
+def handle_voice_general(audio_file,session_id,tts_lang="en",enhancer_enabled=False,enhancer_tone="Helpful"):
+    path=_get_path_from_gr_file(audio_file)
+    if not path: return "No audio",None,[]
+    user_text=transcribe_audio(path)
+    assistant_text=generate_response(session_id,user_text,enhancer_enabled,enhancer_tone)
+    _append_chat_display(session_id,user_text,assistant_text)
+    audio_path=synthesize_speech(assistant_text,tts_lang)
+    return assistant_text,audio_path,_chat_display_to_messages(CHAT_DISPLAY[session_id])
+def handle_voice_pdf(audio_file,session_id,tts_lang="en"):
+    path=_get_path_from_gr_file(audio_file)
+    if not path: return "No audio",None,[]
+    user_text=transcribe_audio(path)
+    assistant_text=handle_pdf_question(user_text,session_id)
+    _append_chat_display(session_id,user_text,assistant_text)
+    audio_path=synthesize_speech(assistant_text,tts_lang)
+    return assistant_text,audio_path,_chat_display_to_messages(CHAT_DISPLAY[session_id])
+def handle_voice_image(audio_file,session_id,tts_lang="en"):
+    path=_get_path_from_gr_file(audio_file)
+    if not path: return "No audio",None,[]
+    user_text=transcribe_audio(path)
+    assistant_text=handle_image_question(user_text,session_id)
+    _append_chat_display(session_id,user_text,assistant_text)
+    audio_path=synthesize_speech(assistant_text,tts_lang)
+    return assistant_text,audio_path,_chat_display_to_messages(CHAT_DISPLAY[session_id])
+def handle_text_general(user_text,session_id,enhancer_enabled=False,enhancer_tone="Helpful"):
+    assistant=generate_response(session_id,user_text,enhancer_enabled,enhancer_tone)
+    _append_chat_display(session_id,user_text,assistant)
+    return assistant,_chat_display_to_messages(CHAT_DISPLAY[session_id])
+def handle_text_pdf(question,session_id): return handle_pdf_question(question,session_id)
+def handle_text_image(question,session_id): return handle_image_question(question,session_id)
+# ------------------ Gradio Custom Tabs UI ------------------
 with gr.Blocks() as demo:
     gr.HTML("""
     <style>
+    body{background:#0f172a;color:#f8fafc;font-family:sans-serif;}
+    h2{color:#facc15;}
+    .tab-btn{background:linear-gradient(90deg,#f472b6,#3b82f6);color:#fff;padding:10px;border-radius:12px;margin-right:5px;font-weight:bold;}
+    .tab-btn:hover{background:linear-gradient(90deg,#3b82f6,#f472b6);}
+    .hidden{display:none;}
+    .visible{display:block;}
+    .chat-msg.user{background:#6b7280;color:#fff;padding:5px;border-radius:10px;margin:3px 0;}
+    .chat-msg.assistant{background:#f59e0b;color:#111;padding:5px;border-radius:10px;margin:3px 0;}
+    .gr-button{background:linear-gradient(90deg,#f472b6,#3b82f6);color:#fff;font-weight:bold;border-radius:12px;padding:10px;}
+    .gr-button:hover{background:linear-gradient(90deg,#3b82f6,#f472b6);}
+    .gr-textbox,.gr-file,.gr-dropdown,.gr-checkbox{background:#1e293b;color:#f8fafc;border-radius:8px;border:1px solid #facc15;}
     </style>
     """)
+    gr.Markdown("## 🌟 Multi-Mode AI Assistant (Voice, PDF, Image)")
+    # Session States
+    session_voice=gr.State(str(uuid.uuid4()))
+    session_pdf=gr.State(str(uuid.uuid4()))
+    session_image=gr.State(str(uuid.uuid4()))
+    # --- Tab Buttons ---
+    with gr.Row():
+        btn_voice_tab=gr.Button("🎤 Voice Chat",elem_id="btn_voice")
+        btn_pdf_tab=gr.Button("📄 PDF Summarizer",elem_id="btn_pdf")
+        btn_image_tab=gr.Button("🖼 Image OCR",elem_id="btn_image")
+    # --- Voice Chat Column ---
+    col_voice=gr.Column(visible=True)
+    chat_voice=gr.Chatbot(height=300,parent=col_voice)
+    with gr.Row(parent=col_voice):
+        mic=gr.Audio(type="filepath",label="🎤 Record Voice")
+        audio_output=gr.Audio(type="filepath",label="Assistant Voice",interactive=False)
+        tts_lang=gr.Dropdown(choices=["en","ur"],value="en",label="TTS Language")
+    with gr.Row(parent=col_voice):
+        btn_general=gr.Button("⚡Ask General")
+        btn_pdf=gr.Button("⚡Ask PDF")
+        btn_image=gr.Button("⚡Ask Image")
+        enhancer_toggle=gr.Checkbox(label="Enable Enhancer",value=False)
+        tone_dropdown=gr.Dropdown(choices=["Helpful","Formal","Friendly"],value="Helpful",label="Tone")
+    with gr.Row(parent=col_voice):
+        btn_reset=gr.Button("♻ Reset Logs")
+        btn_download=gr.Button("📥 Download Summary")
+        summary_file=gr.File(interactive=False)
+    answer_voice=gr.Textbox(visible=False,parent=col_voice)
+    # --- PDF Column ---
+    col_pdf=gr.Column(visible=False)
+    pdf_output=gr.Textbox(lines=5,parent=col_pdf)
+    pdf_upload=gr.File(file_types=[".pdf"],parent=col_pdf)
+    pdf_question=gr.Textbox(lines=3,label="Ask PDF Question",parent=col_pdf)
+    pdf_upload_msg=gr.Textbox(interactive=False,parent=col_pdf)
+    pdf_send=gr.Button("Ask",parent=col_pdf)
+    pdf_reset=gr.Button("♻ Reset Logs",parent=col_pdf)
+    pdf_summary_file=gr.File(interactive=False,parent=col_pdf)
+    pdf_download=gr.Button("📥 Download Summary",parent=col_pdf)
+    # --- Image Column ---
+    col_image=gr.Column(visible=False)
+    image_output=gr.Textbox(lines=5,parent=col_image)
+    image_upload=gr.File(file_types=[".png",".jpg",".jpeg"],parent=col_image)
+    image_question=gr.Textbox(lines=3,label="Ask Image Question",parent=col_image)
+    image_upload_msg=gr.Textbox(interactive=False,parent=col_image)
+    image_send=gr.Button("Ask",parent=col_image)
+    image_reset=gr.Button("♻ Reset Logs",parent=col_image)
+    image_summary_file=gr.File(interactive=False,parent=col_image)
+    image_download=gr.Button("📥 Download Summary",parent=col_image)
+    # --- Tab Switching Logic ---
+    def switch_tab(tab_name):
+        return (
+            tab_name=="voice",
+            tab_name=="pdf",
+            tab_name=="image"
+        )
+    btn_voice_tab.click(lambda:switch_tab("voice"),outputs=[col_voice,col_pdf,col_image])
+    btn_pdf_tab.click(lambda:switch_tab("pdf"),outputs=[col_voice,col_pdf,col_image])
+    btn_image_tab.click(lambda:switch_tab("image"),outputs=[col_voice,col_pdf,col_image])
+    # --- Voice Click Handlers ---
+    btn_general.click(fn=handle_voice_general,inputs=[mic,session_voice,tts_lang,enhancer_toggle,tone_dropdown],outputs=[answer_voice,audio_output,chat_voice])
+    btn_pdf.click(fn=handle_voice_pdf,inputs=[mic,session_pdf,tts_lang],outputs=[answer_voice,audio_output,chat_voice])
+    btn_image.click(fn=handle_voice_image,inputs=[mic,session_image,tts_lang],outputs=[answer_voice,audio_output,chat_voice])
+    btn_reset.click(lambda:(str(uuid.uuid4()),[],None,None,""),outputs=[session_voice,chat_voice,mic,audio_output,answer_voice])
+    btn_download.click(download_pdf_summary,inputs=[session_voice],outputs=[summary_file])
+    # --- PDF Handlers ---
+    pdf_upload.upload(handle_pdf_upload,inputs=[pdf_upload,session_pdf],outputs=[pdf_upload_msg])
+    pdf_send.click(handle_text_pdf,inputs=[pdf_question,session_pdf],outputs=[pdf_output])
+    pdf_reset.click(lambda:(str(uuid.uuid4()),""),outputs=[session_pdf,pdf_output])
+    pdf_download.click(download_pdf_summary,inputs=[session_pdf],outputs=[pdf_summary_file])
+    # --- Image Handlers ---
+    image_upload.upload(handle_image_upload,inputs=[image_upload,session_image],outputs=[image_upload_msg,image_output])
+    image_send.click(handle_text_image,inputs=[image_question,session_image],outputs=[image_output])
+    image_reset.click(lambda:(str(uuid.uuid4()),""),outputs=[session_image,image_output])
+    image_download.click(download_pdf_summary,inputs=[session_image],outputs=[image_summary_file])
+demo.launch()