Spaces:

shukdevdattaEX
/

NemoVision

Paused

App Files Files Community

shukdevdattaEX commited on Dec 26, 2025

Commit

a4b7006

verified ·

1 Parent(s): f4d47c7

Update app.py

Browse files

Files changed (1) hide show

app.py +478 -266

app.py CHANGED Viewed

@@ -1,303 +1,515 @@
 import gradio as gr
 from openai import OpenAI
 import base64
-import os
-from typing import List, Tuple, Any, Dict, Optional
-from PIL import Image
-import io
-# Custom CSS for premium, stunning design
-CUSTOM_CSS = """
-body {
-    background: linear-gradient(135deg, #0f0f23 0%, #1a1a2e 50%, #16213e 100%);
-    font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
-    color: #e0e0e0;
-}
-.gradio-container {
-    max-width: 1400px !important;
-    margin: 0 auto;
-    background: rgba(0, 0, 0, 0.1);
-    border-radius: 20px;
-    box-shadow: 0 20px 40px rgba(0, 0, 0, 0.5);
-    overflow: hidden;
-}
-h1 {
-    background: linear-gradient(45deg, #00d4ff, #0099cc);
-    -webkit-background-clip: text;
-    -webkit-text-fill-color: transparent;
-    text-align: center;
-    margin: 0;
-    padding: 20px;
-    font-size: 2.5em;
-    text-shadow: 0 0 20px rgba(0, 212, 255, 0.5);
-}
-.gr-chatbot {
-    background: rgba(255, 255, 255, 0.05);
-    border-radius: 15px;
-    border: 1px solid rgba(0, 212, 255, 0.2);
-    backdrop-filter: blur(10px);
-}
-.gr-button {
-    background: linear-gradient(45deg, #00d4ff, #0099cc);
-    border: none;
-    border-radius: 10px;
-    color: white;
-    font-weight: bold;
-    transition: all 0.3s ease;
-    box-shadow: 0 5px 15px rgba(0, 212, 255, 0.3);
-}
-.gr-button:hover {
-    transform: translateY(-2px);
-    box-shadow: 0 8px 25px rgba(0, 212, 255, 0.4);
-}
-.gr-textbox, .gr-file {
-    background: rgba(255, 255, 255, 0.1);
-    border: 1px solid rgba(0, 212, 255, 0.3);
-    border-radius: 10px;
-    color: white;
-    backdrop-filter: blur(5px);
-}
-.gr-textbox::placeholder {
-    color: #a0a0a0;
-}
-.sidebar {
-    background: rgba(0, 0, 0, 0.2);
-    padding: 20px;
-    border-radius: 15px;
-    margin: 10px;
-    border: 1px solid rgba(0, 212, 255, 0.1);
-}
-"""
-# Function to encode image to base64
-def encode_image_to_base64(image_path: str) -> str:
     with open(image_path, "rb") as image_file:
-        return base64.b64encode(image_file.read()).decode("utf-8")
-# Function to build user content for multimodal input
-def build_user_content(message: str, files: List[str], video_url: str) -> List[Dict[str, Any]]:
-    content = [{"type": "text", "text": message}]
-    if files:
-        for file_path in files:
-            if file_path.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp')):
-                base64_image = encode_image_to_base64(file_path)
-                content.append({
-                    "type": "image_url",
-                    "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}
-                })
-            # Note: For PDFs, we'd need extraction (e.g., via pdf2image), but skipped for simplicity
-            # Users can upload image screenshots of documents
-    if video_url and video_url.strip():
-        content.append({
-            "type": "video_url",
-            "video_url": {"url": video_url.strip()}
-        })
-    return content
-# Main response function
-def respond_to_query(
     message: str,
     history: List[Tuple[str, str]],
-    files: Optional[List[str]],
-    video_url: str,
-    api_key: str,
-    messages_state: List[Dict[str, Any]]
-) -> Tuple[List[Tuple[str, str]], str, Optional[List[str]], str, List[Dict[str, Any]], str]:
-    if not api_key or not api_key.strip():
-        return history, "", None, "", messages_state, "⚠️ Please enter your OpenRouter API key to start chatting."
-    if not message.strip():
-        return history, "", None, "", messages_state, "⚠️ Please enter a message."
-    client = OpenAI(
-        base_url="https://openrouter.ai/api/v1",
-        api_key=api_key.strip(),
-    )
-    # Copy current messages state
-    current_messages = messages_state.copy() if messages_state else []
-    # Add user input
-    user_content = build_user_content(message, files or [], video_url)
-    current_messages.append({"role": "user", "content": user_content})
-    try:
-        # API call with reasoning enabled
-        response = client.chat.completions.create(
-            model="nvidia/nemotron-nano-12b-v2-vl:free",
-            messages=current_messages,
-            extra_body={"reasoning": {"enabled": True}}
-        )
-        resp_message = response.choices[0].message
-        content = resp_message.content or "No response generated."
-        # Preserve reasoning details for multi-turn continuity
-        assistant_msg = {"role": "assistant", "content": content}
-        if hasattr(resp_message, 'reasoning_details') and resp_message.reasoning_details:
-            assistant_msg["reasoning_details"] = resp_message.reasoning_details
-        current_messages.append(assistant_msg)
-        # Append to history (text-only for display; attachments noted)
-        attachment_note = ""
-        if files:
-            attachment_note += f" + {len(files)} image(s)"
-        if video_url.strip():
-            attachment_note += f" + video URL"
-        display_message = message + (attachment_note if attachment_note else "")
-        display_response = content + ("\n\n*(Reasoning preserved for follow-up)*" if "reasoning_details" in assistant_msg else "")
-        history.append((display_message, display_response))
-        # Clear inputs
-        return history, "", None, "", current_messages, ""
-    except Exception as e:
-        error_msg = f"❌ Error: {str(e)}. Check your API key, file sizes (keep images <5MB), or video URL."
-        history.append((message, error_msg))
-        return history, "", None, "", current_messages, error_msg
-# Examples for creativity and to showcase capabilities
-EXAMPLES = [
-    [
-        "How many 'r's are in the word 'strawberry'? Think step by step.",
-        None,  # No files
-        ""    # No video
-    ],
-    [
-        "Describe this image in detail and reason about its contents.",
-        None,
-        ""
-    ],
-    [
-        "Analyze this chart: What trends do you see? Extract key data points.",
-        None,
-        ""
-    ],
-    [
-        "Read the text in this document image and summarize the main points.",
-        None,
-        ""
-    ],
-    [
-        "Count the objects in these multiple images and compare them.",
-        None,
-        ""
-    ],
-    [
-        "What happens in this video? Summarize the key events.",
-        None,
-        "https://example.com/sample-video.mp4"  # Placeholder; replace with real public URL
-    ]
-]
-# Main Gradio Blocks layout
-with gr.Blocks(theme=gr.themes.Ocean(), css=CUSTOM_CSS) as demo:
-    gr.HTML("""
-    <div style='text-align: center; padding: 10px;'>
-        <h1>🚀 Nemotron Nano 2 VL Premium Demo</h1>
-        <p style='color: #a0a0a0; font-size: 1.1em;'>Unleash multimodal magic: Text, Images, Documents & Videos | Powered by NVIDIA's Hybrid Transformer-Mamba</p>
-    </div>
-    """)
-    with gr.Row():
-        with gr.Column(scale=1):
-            # Sidebar for info and controls
-            with gr.Accordion("📖 Model Capabilities & Tips", open=False):
-                gr.Markdown("""
-                **Key Features:**
-                - **Text Reasoning:** Chain-of-thought with preserved reasoning.
-                - **Image/Document Intelligence:** OCR, chart analysis, multi-image docs (upload screenshots).
-                - **Video Understanding:** Enter public video URL (supports long-form with EVS).
-                - **Pro Tip:** For documents, upload multiple page images. Keep files small for fast inference.
-                - **License:** NVIDIA Open | Free tier via OpenRouter.
-                """)
-            api_key_input = gr.Textbox(
-                label="🔑 OpenRouter API Key",
-                placeholder="Enter your API key here (keep secure!)",
-                type="password",
-                lines=1
-            )
-        with gr.Column(scale=4):
-            # Chat interface
-            chatbot = gr.Chatbot(
-                height=600,
-                show_label=False,
-                avatar_images=("user_avatar.png", None),  # Optional: add custom avatars
-                bubble_full_width=False
-            )
-    with gr.Row():
-        msg_input = gr.Textbox(
-            label="💭 Your Message",
-            placeholder="Ask anything: 'Count the apples' or 'Summarize this video'...",
-            lines=2,
-            scale=3
-        )
-        file_upload = gr.File(
-            label="🖼️ Attachments (Images for OCR/Charts/Docs)",
-            file_types=["image"],
-            file_count="multiple",
-            scale=1
-        )
-        video_input = gr.Textbox(
-            label="🎥 Video URL (Optional)",
-            placeholder="e.g., https://example.com/video.mp4",
-            lines=1
-        )
-    with gr.Row():
-        submit_btn = gr.Button("✨ Send & Reason", variant="primary", scale=3)
-        clear_btn = gr.Button("🗑️ Clear Chat", scale=1)
-    # State for multi-turn messages
-    messages_state = gr.State([])
-    # Event handlers
     submit_btn.click(
-        fn=respond_to_query,
-        inputs=[msg_input, chatbot, file_upload, video_input, api_key_input, messages_state],
-        outputs=[chatbot, msg_input, file_upload, video_input, messages_state, msg_input]
     ).then(
-        fn=lambda: gr.Info("Message sent! Reasoning active."),
-        outputs=[]
     )
-    clear_btn.click(
-        fn=lambda: ([], "", None, "", [], ""),
-        outputs=[chatbot, msg_input, file_upload, video_input, messages_state, msg_input]
     ).then(
-        fn=lambda: gr.Info("Chat cleared."),
-        outputs=[]
     )
-    # Examples
-    gr.Examples(
-        examples=EXAMPLES,
-        inputs=[msg_input, file_upload, video_input],
-        label="💡 Quick Starts",
-        examples_per_page=6,
-        run_on_click=True,
-        fn=respond_to_query,
-        outputs=[chatbot, msg_input, file_upload, video_input, messages_state, msg_input],
-        cache_examples=False  # Since files are dynamic
-    ).style(container=False)
-    # Footer
-    gr.Markdown("""
-    <div style='text-align: center; padding: 20px; color: #a0a0a0;'>
-        Built with ❤️ for creative multimodal exploration | © 2025 Inspired by NVIDIA Nemotron
-    </div>
-    """)
 if __name__ == "__main__":
-    demo.launch(
-        share=True,  # Enable public link for demo
-        # server_name="0.0.0.0",
-        # server_port=7860,
-        # show_error=True,
-        # quiet=False
     )

 import gradio as gr
 from openai import OpenAI
 import base64
+from pathlib import Path
+import json
+from typing import List, Tuple, Optional
+import time
+# Global client variable
+client = None
+def initialize_client(api_key: str) -> Tuple[str, bool]:
+    """Initialize OpenAI client with OpenRouter"""
+    global client
+    if not api_key or not api_key.strip():
+        return "⚠️ Please enter a valid API key", False
+    try:
+        client = OpenAI(
+            base_url="https://openrouter.ai/api/v1",
+            api_key=api_key.strip()
+        )
+        return "✅ API Key configured successfully! You can now start chatting.", True
+    except Exception as e:
+        return f"❌ Error initializing client: {str(e)}", False
+def encode_image(image_path: str) -> str:
+    """Encode image to base64"""
     with open(image_path, "rb") as image_file:
+        return base64.b64encode(image_file.read()).decode('utf-8')
+def create_image_content(image_path: str, mime_type: str = "image/jpeg") -> dict:
+    """Create image content for API"""
+    base64_image = encode_image(image_path)
+    return {
+        "type": "image_url",
+        "image_url": {
+            "url": f"data:{mime_type};base64,{base64_image}"
+        }
+    }
+def process_message(
     message: str,
     history: List[Tuple[str, str]],
+    images: Optional[List] = None,
+    enable_reasoning: bool = True,
+    temperature: float = 0.7,
+    max_tokens: int = 2000
+) -> Tuple[List[Tuple[str, str]], str]:
+    """Process user message and generate response"""
+    global client
+    if client is None:
+        return history + [(message, "❌ Please configure your API key first in the Settings tab.")], ""
+    try:
+        # Build messages array
+        messages = []
+        # Add conversation history
+        for user_msg, assistant_msg in history:
+            messages.append({"role": "user", "content": user_msg})
+            if assistant_msg:
+                messages.append({"role": "assistant", "content": assistant_msg})
+        # Build current message content
+        content = []
+        # Add images if provided
+        if images:
+            for img in images:
+                if img is not None:
+                    # Determine MIME type
+                    img_path = Path(img)
+                    mime_type = "image/jpeg"
+                    if img_path.suffix.lower() in ['.png']:
+                        mime_type = "image/png"
+                    elif img_path.suffix.lower() in ['.webp']:
+                        mime_type = "image/webp"
+                    content.append(create_image_content(img, mime_type))
+        # Add text message
+        content.append({"type": "text", "text": message})
+        messages.append({"role": "user", "content": content})
+        # Prepare API call parameters
+        api_params = {
+            "model": "nvidia/nemotron-nano-12b-v2-vl:free",
+            "messages": messages,
+            "temperature": temperature,
+            "max_tokens": max_tokens
+        }
+        # Add reasoning if enabled
+        if enable_reasoning:
+            api_params["extra_body"] = {"reasoning": {"enabled": True}}
+        # Make API call
+        response = client.chat.completions.create(**api_params)
+        assistant_message = response.choices[0].message.content
+        # Extract reasoning if available
+        reasoning_text = ""
+        if enable_reasoning and hasattr(response.choices[0].message, 'reasoning_details'):
+            reasoning_details = response.choices[0].message.reasoning_details
+            if reasoning_details:
+                reasoning_text = f"\n\n**🧠 Reasoning Process:**\n{json.dumps(reasoning_details, indent=2)}"
+        # Update history
+        new_history = history + [(message, assistant_message)]
+        return new_history, reasoning_text
+    except Exception as e:
+        error_message = f"❌ Error: {str(e)}"
+        return history + [(message, error_message)], ""
+def clear_conversation():
+    """Clear conversation history"""
+    return [], ""
+# Custom CSS for premium design
+custom_css = """
+@import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap');
+* {
+    font-family: 'Inter', sans-serif;
+}
+.gradio-container {
+    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
+}
+#main-container {
+    background: rgba(255, 255, 255, 0.98);
+    border-radius: 24px;
+    padding: 32px;
+    box-shadow: 0 20px 60px rgba(0, 0, 0, 0.3);
+    backdrop-filter: blur(10px);
+}
+.header-title {
+    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+    -webkit-background-clip: text;
+    -webkit-text-fill-color: transparent;
+    font-size: 3em;
+    font-weight: 700;
+    text-align: center;
+    margin-bottom: 0.3em;
+    letter-spacing: -0.02em;
+}
+.header-subtitle {
+    text-align: center;
+    color: #666;
+    font-size: 1.1em;
+    margin-bottom: 1.5em;
+    font-weight: 500;
+}
+.feature-badge {
+    display: inline-block;
+    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+    color: white;
+    padding: 6px 16px;
+    border-radius: 20px;
+    font-size: 0.85em;
+    font-weight: 600;
+    margin: 4px;
+    box-shadow: 0 4px 12px rgba(102, 126, 234, 0.3);
+}
+.capability-card {
+    background: linear-gradient(135deg, #f6f8fb 0%, #ffffff 100%);
+    border: 2px solid #e0e7ff;
+    border-radius: 16px;
+    padding: 20px;
+    margin: 10px 0;
+    transition: all 0.3s ease;
+}
+.capability-card:hover {
+    transform: translateY(-4px);
+    box-shadow: 0 12px 24px rgba(102, 126, 234, 0.15);
+    border-color: #667eea;
+}
+.tab-nav button {
+    font-weight: 600 !important;
+    border-radius: 12px !important;
+    transition: all 0.3s ease !important;
+}
+.tab-nav button.selected {
+    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
+    color: white !important;
+}
+button.primary {
+    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
+    border: none !important;
+    color: white !important;
+    font-weight: 600 !important;
+    border-radius: 12px !important;
+    padding: 12px 32px !important;
+    transition: all 0.3s ease !important;
+    box-shadow: 0 4px 12px rgba(102, 126, 234, 0.3) !important;
+}
+button.primary:hover {
+    transform: translateY(-2px) !important;
+    box-shadow: 0 8px 20px rgba(102, 126, 234, 0.4) !important;
+}
+button.secondary {
+    background: white !important;
+    border: 2px solid #667eea !important;
+    color: #667eea !important;
+    font-weight: 600 !important;
+    border-radius: 12px !important;
+    transition: all 0.3s ease !important;
+}
+button.secondary:hover {
+    background: #f0f4ff !important;
+}
+.info-box {
+    background: linear-gradient(135deg, #e0e7ff 0%, #f0f4ff 100%);
+    border-left: 4px solid #667eea;
+    border-radius: 12px;
+    padding: 16px 20px;
+    margin: 16px 0;
+    font-size: 0.95em;
+    line-height: 1.6;
+}
+.success-box {
+    background: linear-gradient(135deg, #d4edda 0%, #e8f5e9 100%);
+    border-left: 4px solid #28a745;
+    border-radius: 12px;
+    padding: 16px 20px;
+    margin: 16px 0;
+    color: #155724;
+    font-weight: 500;
+}
+.chatbot {
+    border-radius: 16px !important;
+    border: 2px solid #e0e7ff !important;
+    box-shadow: 0 8px 24px rgba(102, 126, 234, 0.1) !important;
+}
+"""
+# Build Gradio Interface
+with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as app:
+    gr.HTML("""
+        <div style='text-align: center; padding: 20px 0;'>
+            <h1 class='header-title'>🚀 Nemotron Nano VL Studio</h1>
+            <p class='header-subtitle'>Advanced Multimodal AI Assistant powered by NVIDIA Nemotron Nano 12B 2 VL</p>
+            <div style='margin: 20px 0;'>
+                <span class='feature-badge'>📊 Document Intelligence</span>
+                <span class='feature-badge'>🎬 Video Understanding</span>
+                <span class='feature-badge'>🧠 Reasoning Engine</span>
+                <span class='feature-badge'>📈 Chart Analysis</span>
+                <span class='feature-badge'>🔤 OCR Excellence</span>
+            </div>
+        </div>
+    """)
+    with gr.Row(elem_id="main-container"):
+        with gr.Column():
+            with gr.Tabs():
+                # Chat Tab
+                with gr.Tab("💬 Chat Interface", elem_classes=["tab-nav"]):
+                    gr.HTML("""
+                        <div class='info-box'>
+                            <strong>🎯 What can I do?</strong><br>
+                            • Analyze images, documents, and charts<br>
+                            • Perform OCR and text extraction<br>
+                            • Reason through complex problems<br>
+                            • Answer questions about visual content<br>
+                            • Process multi-image documents
+                        </div>
+                    """)
+                    chatbot = gr.Chatbot(
+                        label="Conversation",
+                        height=500,
+                        show_copy_button=True,
+                        avatar_images=(None, "https://www.nvidia.com/favicon.ico"),
+                        elem_classes=["chatbot"]
+                    )
+                    with gr.Row():
+                        msg = gr.Textbox(
+                            label="Your Message",
+                            placeholder="Ask me anything about images, documents, or reasoning tasks...",
+                            lines=3,
+                            scale=4
+                        )
+                    with gr.Row():
+                        images = gr.File(
+                            label="📎 Upload Images/Documents (Multi-image support)",
+                            file_count="multiple",
+                            file_types=["image"],
+                            scale=3
+                        )
+                    with gr.Row():
+                        submit_btn = gr.Button("🚀 Send", variant="primary", scale=2, elem_classes=["primary"])
+                        clear_btn = gr.Button("🗑️ Clear", variant="secondary", scale=1, elem_classes=["secondary"])
+                    reasoning_display = gr.Textbox(
+                        label="🧠 Reasoning Process (when enabled)",
+                        lines=6,
+                        interactive=False
+                    )
+                # Settings Tab
+                with gr.Tab("⚙️ Settings", elem_classes=["tab-nav"]):
+                    gr.HTML("""
+                        <div class='info-box'>
+                            <strong>🔑 API Configuration</strong><br>
+                            Get your free API key from <a href='https://openrouter.ai/keys' target='_blank'>OpenRouter</a>
+                        </div>
+                    """)
+                    api_key_input = gr.Textbox(
+                        label="OpenRouter API Key",
+                        placeholder="sk-or-v1-...",
+                        type="password",
+                        lines=1
+                    )
+                    api_status = gr.Textbox(label="Status", interactive=False)
+                    save_key_btn = gr.Button("💾 Save API Key", variant="primary", elem_classes=["primary"])
+                    gr.HTML("<hr style='margin: 30px 0; border: none; border-top: 2px solid #e0e7ff;'>")
+                    gr.HTML("""
+                        <div class='info-box'>
+                            <strong>🎛️ Model Parameters</strong><br>
+                            Fine-tune the model's behavior
+                        </div>
+                    """)
+                    enable_reasoning = gr.Checkbox(
+                        label="🧠 Enable Reasoning Mode",
+                        value=True,
+                        info="Show the model's step-by-step thinking process"
+                    )
+                    temperature = gr.Slider(
+                        minimum=0.0,
+                        maximum=2.0,
+                        value=0.7,
+                        step=0.1,
+                        label="🌡️ Temperature",
+                        info="Higher = more creative, Lower = more focused"
+                    )
+                    max_tokens = gr.Slider(
+                        minimum=256,
+                        maximum=4096,
+                        value=2000,
+                        step=256,
+                        label="📏 Max Tokens",
+                        info="Maximum length of response"
+                    )
+                # Examples Tab
+                with gr.Tab("📚 Examples & Capabilities", elem_classes=["tab-nav"]):
+                    gr.HTML("""
+                        <div class='capability-card'>
+                            <h3>📊 Document Intelligence</h3>
+                            <p><strong>Example:</strong> "Extract all the key metrics from this financial report"</p>
+                            <p>Nemotron excels at understanding complex documents, tables, and structured data.</p>
+                        </div>
+                        <div class='capability-card'>
+                            <h3>🔤 OCR Excellence</h3>
+                            <p><strong>Example:</strong> "What text appears in this image?"</p>
+                            <p>State-of-the-art optical character recognition for any text in images.</p>
+                        </div>
+                        <div class='capability-card'>
+                            <h3>📈 Chart & Graph Analysis</h3>
+                            <p><strong>Example:</strong> "What trends do you see in this chart?"</p>
+                            <p>Analyze charts, graphs, and data visualizations with high accuracy.</p>
+                        </div>
+                        <div class='capability-card'>
+                            <h3>🧠 Advanced Reasoning</h3>
+                            <p><strong>Example:</strong> "How many r's are in 'strawberry'? Think step by step."</p>
+                            <p>Transparent reasoning process shows how the model arrives at answers.</p>
+                        </div>
+                        <div class='capability-card'>
+                            <h3>🎬 Video Understanding</h3>
+                            <p><strong>Example:</strong> Upload video frames and ask "What's happening in this sequence?"</p>
+                            <p>Process multiple frames to understand temporal sequences and events.</p>
+                        </div>
+                        <div class='capability-card'>
+                            <h3>📑 Multi-Image Documents</h3>
+                            <p><strong>Example:</strong> Upload multiple pages and ask "Summarize this document"</p>
+                            <p>Handle multi-page documents and complex layouts with ease.</p>
+                        </div>
+                    """)
+                    gr.HTML("""
+                        <div class='success-box' style='margin-top: 30px;'>
+                            <strong>💡 Pro Tips:</strong><br>
+                            • Upload multiple images for document analysis<br>
+                            • Enable reasoning mode for complex problems<br>
+                            • Adjust temperature for creative vs precise outputs<br>
+                            • Use specific questions for better OCR results<br>
+                            • Try video frame sequences for temporal analysis
+                        </div>
+                    """)
+                # About Tab
+                with gr.Tab("ℹ️ About", elem_classes=["tab-nav"]):
+                    gr.Markdown("""
+                    # 🚀 About Nemotron Nano 12B 2 VL
+                    ## 🎯 Model Overview
+                    **NVIDIA Nemotron Nano 2 VL** is a cutting-edge 12-billion-parameter open multimodal reasoning model
+                    designed for video understanding and document intelligence.
+                    ## ✨ Key Features
+                    - **🏗️ Hybrid Architecture**: Combines Transformer accuracy with Mamba's efficient sequence modeling
+                    - **⚡ High Performance**: Superior throughput and lower latency
+                    - **📊 Leading Results**: ~74 average score across major benchmarks
+                    - **🎯 Specialized Training**: NVIDIA-curated synthetic datasets
+                    - **🎬 Video Support**: Efficient Video Sampling (EVS) for long-form content
+                    - **📖 Open Source**: Released under permissive NVIDIA open license
+                    ## 📈 Benchmark Performance
+                    Achieves leading results on:
+                    - OCRBench v2
+                    - MMMU
+                    - MathVista
+                    - AI2D
+                    - OCR-Reasoning
+                    - ChartQA
+                    - DocVQA
+                    - Video-MME
+                    ## 🔧 Deployment
+                    Supported across:
+                    - NVIDIA NeMo
+                    - NVIDIA NIM
+                    - Major inference runtimes
+                    ## 🌐 Learn More
+                    - [OpenRouter API](https://openrouter.ai/)
+                    - [NVIDIA NeMo](https://www.nvidia.com/en-us/ai-data-science/products/nemo/)
+                    ---
+                    <div style='text-align: center; padding: 20px; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); border-radius: 12px; color: white;'>
+                        <strong>Built with ❤️ using Gradio and powered by NVIDIA Nemotron</strong>
+                    </div>
+                    """)
+    # Event Handlers
+    save_key_btn.click(
+        fn=initialize_client,
+        inputs=[api_key_input],
+        outputs=[api_status]
+    )
     submit_btn.click(
+        fn=process_message,
+        inputs=[msg, chatbot, images, enable_reasoning, temperature, max_tokens],
+        outputs=[chatbot, reasoning_display]
     ).then(
+        lambda: ("", None),
+        outputs=[msg, images]
     )
+    msg.submit(
+        fn=process_message,
+        inputs=[msg, chatbot, images, enable_reasoning, temperature, max_tokens],
+        outputs=[chatbot, reasoning_display]
     ).then(
+        lambda: ("", None),
+        outputs=[msg, images]
+    )
+    clear_btn.click(
+        fn=clear_conversation,
+        outputs=[chatbot, reasoning_display]
     )
+# Launch the app
 if __name__ == "__main__":
+    app.launch(
+        share=True,
+        server_name="0.0.0.0",
+        server_port=7860,
+        show_error=True
     )