Converse_AI

Running

App Files Files Community

Eyadddddddd commited on Jan 2

Commit

5b726d6

verified ·

1 Parent(s): 5ecf5f0

Update app.py

Browse files

Files changed (1) hide show

app.py +241 -86

app.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import os
 import base64
 import tempfile
 import gradio as gr
 from groq import Groq
@@ -8,24 +9,32 @@ from gradio_client import Client
 from pdf2image import convert_from_path
 from PIL import Image
-# =========================
-# Groq setup
-# =========================
 GROQ_API_KEY = os.getenv("GROQ_API_KEY")
 if not GROQ_API_KEY:
     raise ValueError("GROQ_API_KEY environment variable is not set.")
-client = Groq(api_key=GROQ_API_KEY)
 TEXT_MODEL = "llama-3.1-8b-instant"
-# =========================
-# Qwen3-VL Demo Space
-# =========================
 qwen_client = Client("Qwen/Qwen3-VL-Demo")
-# =========================
-# Modes and prompts
-# =========================
 MODE_PROMPTS = {
     "Normal Chat": (
         "You are NeoHelper, Eyad’s branded assistant. "
@@ -45,28 +54,74 @@ MODE_PROMPTS = {
     ),
 }
-# =========================
-# Helpers
-# =========================
 def encode_image(path: str) -> str:
     with open(path, "rb") as f:
         return base64.b64encode(f.read()).decode("utf-8")
-def extract_images_from_pdf(pdf_path: str):
     try:
         pages = convert_from_path(pdf_path)
-        image_paths = []
-        for page in pages:
             tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".png")
             page.save(tmp.name, "PNG")
             image_paths.append(tmp.name)
         return image_paths
     except Exception:
         return None
-def call_qwen3vl(image_path, prompt):
     try:
         result = qwen_client.predict(
             "/add_message",
@@ -75,111 +130,211 @@ def call_qwen3vl(image_path, prompt):
                 "files": [image_path],
             },
         )
         chatbot_state = result[1]
-        return chatbot_state[-1][1] if chatbot_state else "No response."
     except Exception as e:
         return f"⚠️ Vision model error: {str(e)}"
-def call_groq_text(message, system_prompt):
-    try:
-        resp = client.chat.completions.create(
-            model=TEXT_MODEL,
-            messages=[
-                {"role": "system", "content": system_prompt},
-                {"role": "user", "content": message},
-            ],
-            max_tokens=700,
-        )
-        return resp.choices[0].message.content
-    except Exception as e:
-        return f"⚠️ Text model error: {str(e)}"
-# =========================
-# Main chat function
-# =========================
 def chat_fn(message, history, file, mode):
-    system_prompt = MODE_PROMPTS.get(mode, MODE_PROMPTS["Normal Chat"])
     if isinstance(file, list):
         file = file[0] if file else None
     if file is None:
         return call_groq_text(message, system_prompt)
     file_path = file.name
-    file_ext = file_path.lower()
-    # Image
-    if file_ext.endswith((".png", ".jpg", ".jpeg", ".webp")):
         try:
-            prompt = message or "Explain this image."
             return call_qwen3vl(file_path, prompt)
         except Exception as e:
             return f"⚠️ Error analyzing image: {str(e)}"
-    # PDF
     if file_ext.endswith(".pdf"):
         try:
             image_paths = extract_images_from_pdf(file_path)
             if not image_paths:
-                return "This PDF contains no images I can analyze."
-            all_analyses = []
-            for img_path in image_paths:
-                prompt = message or "Explain this PDF page."
-                result = call_qwen3vl(img_path, prompt)
-                all_analyses.append(result)
-            summary_prompt = (
-                "Summarize these PDF page analyses into one clear explanation "
-                "for a student:\n\n" + "\n\n---\n\n".join(all_analyses)
-            )
-            return call_groq_text(summary_prompt, system_prompt)
         except Exception as e:
             return f"⚠️ Error processing PDF: {str(e)}"
-    return "Unsupported file type. Please upload an image or PDF."
-# =========================
-# Gradio UI
-# =========================
-with gr.Blocks(title="NeoHelper") as demo:
-    gr.Markdown("## 🧠 NeoHelper — Text + Image + PDF", elem_id="title")
-    mode_dd = gr.Dropdown(
-        choices=list(MODE_PROMPTS.keys()),
-        value="Normal Chat",
-        label="Mode",
-    )
-    file_input = gr.File(
-        label="Upload image or PDF (optional)",
-        file_types=["image", ".pdf"],
-        every=True,
-    )
-    chat = gr.ChatInterface(
-        fn=chat_fn,
-        additional_inputs=[file_input, mode_dd],
-        chatbot=gr.Chatbot(show_label=False),
-        title="NeoHelper",
-        description=None,
-        examples=None,
-        api_name="neohelper_chat",
-    )
-demo.launch(
-    theme=gr.themes.Base().set(
-        body_background_fill="#1c1c1c",
-        body_text_color="#f0f0f0",
-        block_background_fill="#2a2a2a",
-        block_border_color="#444",
-        block_label_text_color="#f0f0f0",
         button_primary_background_fill="#4a90e2",
         button_primary_text_color="#ffffff",
     )
-)
-demo.launch()

 import os
 import base64
 import tempfile
+from typing import List, Optional, Tuple
 import gradio as gr
 from groq import Groq
 from pdf2image import convert_from_path
 from PIL import Image
+# ============================================================
+# CONFIG & GLOBALS
+# ============================================================
+# ----------------------------
+# 1. Groq setup (text model)
+# ----------------------------
 GROQ_API_KEY = os.getenv("GROQ_API_KEY")
 if not GROQ_API_KEY:
     raise ValueError("GROQ_API_KEY environment variable is not set.")
+groq_client = Groq(api_key=GROQ_API_KEY)
 TEXT_MODEL = "llama-3.1-8b-instant"
+# ----------------------------
+# 2. Qwen3-VL Demo (vision)
+# ----------------------------
+# Uses the public Hugging Face Space:
+# https://huggingface.co/spaces/Qwen/Qwen3-VL-Demo
 qwen_client = Client("Qwen/Qwen3-VL-Demo")
+# ----------------------------
+# 3. Modes and system prompts
+# ----------------------------
 MODE_PROMPTS = {
     "Normal Chat": (
         "You are NeoHelper, Eyad’s branded assistant. "
     ),
 }
+# ============================================================
+# HELPER FUNCTIONS
+# ============================================================
 def encode_image(path: str) -> str:
+    """
+    Read an image file and return a base64-encoded string.
+    NOTE: This is kept for possible future use (e.g., if you switch
+    back to a vision API that needs base64). The current Qwen3-VL
+    demo integration sends file paths directly, not base64.
+    """
     with open(path, "rb") as f:
         return base64.b64encode(f.read()).decode("utf-8")
+def extract_images_from_pdf(pdf_path: str) -> Optional[List[str]]:
+    """
+    Convert all pages of a PDF into PNG images.
+    Returns:
+        - list of temporary image file paths on success
+        - None on failure
+    """
     try:
         pages = convert_from_path(pdf_path)
+        image_paths: List[str] = []
+        for i, page in enumerate(pages):
             tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".png")
             page.save(tmp.name, "PNG")
             image_paths.append(tmp.name)
         return image_paths
     except Exception:
         return None
+def call_groq_text(message: str, system_prompt: str) -> str:
+    """
+    Send a pure text query to Groq's Llama 3.1 8B Instant model.
+    """
+    try:
+        resp = groq_client.chat.completions.create(
+            model=TEXT_MODEL,
+            messages=[
+                {"role": "system", "content": system_prompt},
+                {"role": "user", "content": message},
+            ],
+            max_tokens=900,
+        )
+        return resp.choices[0].message.content
+    except Exception as e:
+        return f"⚠️ Text model error: {str(e)}"
+def call_qwen3vl(image_path: str, prompt: str) -> str:
+    """
+    Call the Qwen3-VL demo Space with an image + text prompt.
+    Args:
+        image_path: local path to an image file
+        prompt: text instruction/question
+    Returns:
+        String answer from the vision model.
+    """
     try:
         result = qwen_client.predict(
             "/add_message",
                 "files": [image_path],
             },
         )
+        # result = [multimodal_input_state, chatbot_state]
         chatbot_state = result[1]
+        if not chatbot_state:
+            return "No response from vision model."
+        # chatbot_state is like [[user_msg, assistant_msg], ...]
+        return chatbot_state[-1][1]
     except Exception as e:
         return f"⚠️ Vision model error: {str(e)}"
+def build_system_prompt(mode: str) -> str:
+    """
+    Get the system prompt for the selected mode.
+    """
+    return MODE_PROMPTS.get(mode, MODE_PROMPTS["Normal Chat"])
+def summarize_pdf_analyses(
+    analyses: List[str],
+    system_prompt: str,
+) -> str:
+    """
+    Summarize multiple page-level analyses into one explanation.
+    Uses Groq text model for the final summary.
+    """
+    joined = "\n\n--- PAGE BREAK ---\n\n".join(analyses)
+    summary_prompt = (
+        "Summarize these PDF page analyses into one clear explanation for a student. "
+        "Be structured, simple, and concise:\n\n"
+        f"{joined}"
+    )
+    return call_groq_text(summary_prompt, system_prompt)
+# ============================================================
+# CORE BACKEND: chat_fn
+# ============================================================
 def chat_fn(message, history, file, mode):
+    """
+    Main NeoHelper backend function.
+    Signature is compatible with gr.ChatInterface:
+        fn(message: str, history: list, *additional_inputs)
+    Behavior:
+      - If no file:
+          -> Use Groq text model with the selected mode.
+      - If image:
+          -> Use Qwen3-VL demo (vision).
+      - If PDF:
+          -> Convert to images, analyze each page with Qwen3-VL,
+             then summarize with Groq.
+    """
+    system_prompt = build_system_prompt(mode)
+    # Normalize message
+    if message is None:
+        message = ""
+    message = message.strip()
+    # Normalize file input (Gradio sometimes passes [file])
     if isinstance(file, list):
         file = file[0] if file else None
+    # -----------------------------------------
+    # TEXT-ONLY MODE (Groq)
+    # -----------------------------------------
     if file is None:
+        if not message:
+            return "Please type a question or upload a file."
         return call_groq_text(message, system_prompt)
+    # -----------------------------------------
+    # FILE MODE (image or PDF)
+    # -----------------------------------------
     file_path = file.name
+    # Some platforms provide orig_name (user-facing name)
+    orig_name = getattr(file, "orig_name", file_path)
+    file_ext = orig_name.lower()
+    # -----------------------------------------
+    # IMAGE MODE (Qwen3-VL)
+    # -----------------------------------------
+    if file_ext.endswith((".png", ".jpg", ".jpeg", ".webp", ".bmp")):
         try:
+            prompt = message or "Explain this image in a clear way."
             return call_qwen3vl(file_path, prompt)
         except Exception as e:
             return f"⚠️ Error analyzing image: {str(e)}"
+    # -----------------------------------------
+    # PDF MODE (Qwen3-VL + Groq summary)
+    # -----------------------------------------
     if file_ext.endswith(".pdf"):
         try:
             image_paths = extract_images_from_pdf(file_path)
             if not image_paths:
+                return "This PDF contains no pages I can analyze."
+            page_analyses: List[str] = []
+            for idx, img_path in enumerate(image_paths, start=1):
+                prompt = (
+                    message
+                    or f"Explain page {idx} of this PDF in a simple way for a student."
+                )
+                page_answer = call_qwen3vl(img_path, prompt)
+                page_analyses.append(f"Page {idx}:\n{page_answer}")
+            # Summarize across all pages with Groq
+            final_summary = summarize_pdf_analyses(page_analyses, system_prompt)
+            return final_summary
         except Exception as e:
             return f"⚠️ Error processing PDF: {str(e)}"
+    # -----------------------------------------
+    # UNSUPPORTED FILE TYPE
+    # -----------------------------------------
+    return "Unsupported file type. Please upload an image or a PDF."
+# ============================================================
+# UI (Gradio ChatInterface, Dark Theme, NeoHelper Branding)
+# ============================================================
+def build_ui() -> gr.Blocks:
+    """
+    Build the full NeoHelper UI with:
+      - Dark theme (applied at launch)
+      - NeoHelper branding
+      - No logo
+      - No suggestions/examples
+      - Mode dropdown
+      - Optional file upload (image/PDF)
+      - ChatInterface layout
+    """
+    with gr.Blocks(title="NeoHelper") as demo:
+        # Header
+        gr.Markdown(
+            """
+            # 🧠 NeoHelper
+            **Multimodal assistant with:**
+            - Groq Llama 3.1 8B (text)
+            - Qwen3-VL Demo (images + PDFs)
+            """,
+            elem_id="title",
+        )
+        with gr.Row():
+            with gr.Column(scale=1):
+                mode_dd = gr.Dropdown(
+                    choices=list(MODE_PROMPTS.keys()),
+                    value="Normal Chat",
+                    label="Mode",
+                )
+                file_input = gr.File(
+                    label="Upload image or PDF (optional)",
+                    file_types=["image", ".pdf"],
+                    every=True,
+                )
+            with gr.Column(scale=3):
+                # Core ChatInterface
+                gr.ChatInterface(
+                    fn=chat_fn,
+                    additional_inputs=[file_input, mode_dd],
+                    chatbot=gr.Chatbot(
+                        show_label=False,
+                    ),
+                    title="NeoHelper",
+                    description=None,   # no suggestions
+                    examples=None,      # no examples
+                    api_name="neohelper_chat",
+                )
+    return demo
+# ============================================================
+# MAIN
+# ============================================================
+if __name__ == "__main__":
+    app = build_ui()
+    dark_theme = gr.themes.Base().set(
+        body_background_fill="#121212",
+        body_text_color="#f5f5f5",
+        block_background_fill="#1e1e1e",
+        block_border_color="#333333",
+        block_label_text_color="#f5f5f5",
         button_primary_background_fill="#4a90e2",
         button_primary_text_color="#ffffff",
+        button_secondary_background_fill="#333333",
+        button_secondary_text_color="#f5f5f5",
+        input_background_fill="#1a1a1a",
+        input_border_color="#444444",
     )
+    app.launch(
+        theme=dark_theme,
+        show_api=False,  # no logo/API docs panel
+        share=False,
+    )