Spaces:

shukdevdattaEX
/

NemoVision

Paused

App Files Files Community

shukdevdattaEX commited on Dec 26, 2025

Commit

b7e74ea

verified ·

1 Parent(s): 2b5cbea

Update app.py

Browse files

Files changed (1) hide show

app.py +289 -181

app.py CHANGED Viewed

@@ -7,6 +7,7 @@ from typing import List, Tuple, Optional
 import time
 from PIL import Image
 import io
 # Global client variable
 client = None
@@ -31,59 +32,132 @@ def encode_image(image_path: str) -> str:
     with open(image_path, "rb") as image_file:
         return base64.b64encode(image_file.read()).decode('utf-8')
-def pdf_to_images(pdf_path: str) -> List[Image.Image]:
-    """Convert PDF to images using pdf2image"""
     try:
         from pdf2image import convert_from_path
         images = convert_from_path(pdf_path, dpi=200)
         return images
-    except ImportError:
-        # If pdf2image is not available, try PyMuPDF (fitz)
         try:
-            import fitz
-            doc = fitz.open(pdf_path)
-            images = []
-            for page_num in range(len(doc)):
-                page = doc[page_num]
-                pix = page.get_pixmap(matrix=fitz.Matrix(2, 2))
-                img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
-                images.append(img)
-            doc.close()
-            return images
-        except ImportError:
-            raise Exception("Please install pdf2image or PyMuPDF: pip install pdf2image PyMuPDF")
 def image_to_base64(image: Image.Image, format: str = "PNG") -> str:
     """Convert PIL Image to base64"""
     buffered = io.BytesIO()
-    image.save(buffered, format=format)
     return base64.b64encode(buffered.getvalue()).decode('utf-8')
-def process_file(file_path: str) -> List[dict]:
-    """Process a file and return content blocks for API"""
     file_extension = Path(file_path).suffix.lower()
     content_blocks = []
     try:
         if file_extension == '.pdf':
             # Convert PDF pages to images
-            images = pdf_to_images(file_path)
-            for img in images:
-                base64_image = image_to_base64(img, format="PNG")
                 content_blocks.append({
                     "type": "image_url",
                     "image_url": {
-                        "url": f"data:image/png;base64,{base64_image}"
                     }
                 })
         elif file_extension == '.txt':
             # Read text file
-            with open(file_path, 'r', encoding='utf-8') as f:
-                text_content = f.read()
             content_blocks.append({
                 "type": "text",
-                "text": f"[Text File Content]:\n{text_content}"
             })
         else:
             # Handle image files
             # Determine MIME type
@@ -99,20 +173,51 @@ def process_file(file_path: str) -> List[dict]:
             elif file_extension in ['.tiff', '.tif']:
                 mime_type = "image/tiff"
-            base64_image = encode_image(file_path)
-            content_blocks.append({
-                "type": "image_url",
-                "image_url": {
-                    "url": f"data:{mime_type};base64,{base64_image}"
-                }
-            })
     except Exception as e:
         content_blocks.append({
             "type": "text",
-            "text": f"[Error processing file {Path(file_path).name}: {str(e)}]"
         })
-    return content_blocks
 def process_message(
     message: str,
@@ -121,15 +226,20 @@ def process_message(
     enable_reasoning: bool = True,
     temperature: float = 0.7,
     max_tokens: int = 2000
-) -> Tuple[List[Tuple[str, str]], str]:
-    """Process user message and generate response"""
     global client
     if client is None:
-        return history + [(message, "❌ Please configure your API key first in the Settings tab.")], ""
     if not message.strip() and not files:
-        return history + [(message, "⚠️ Please enter a message or upload files.")], ""
     try:
         # Build messages array
@@ -147,17 +257,28 @@ def process_message(
         # Process files if provided
         if files:
             file_count = 0
             for file in files:
                 if file is not None:
-                    file_blocks = process_file(file)
                     content.extend(file_blocks)
                     file_count += 1
             if file_count > 0:
-                content.insert(0, {
-                    "type": "text",
-                    "text": f"[{file_count} file(s) uploaded]"
-                })
         # Add text message
         if message.strip():
@@ -187,20 +308,71 @@ def process_message(
         if enable_reasoning and hasattr(response.choices[0].message, 'reasoning_details'):
             reasoning_details = response.choices[0].message.reasoning_details
             if reasoning_details:
-                reasoning_text = f"\n\n**🧠 Reasoning Process:**\n{json.dumps(reasoning_details, indent=2)}"
         # Update history
         new_history = history + [(message, assistant_message)]
-        return new_history, reasoning_text
     except Exception as e:
         error_message = f"❌ Error: {str(e)}"
-        return history + [(message, error_message)], ""
 def clear_conversation():
     """Clear conversation history"""
-    return [], ""
 # Custom CSS for premium design
 custom_css = """
@@ -375,6 +547,13 @@ with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as app:
                         elem_classes=["chatbot"]
                     )
                     with gr.Row():
                         msg = gr.Textbox(
                             label="Your Message",
@@ -454,11 +633,31 @@ with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as app:
                         info="Maximum length of response"
                     )
                     gr.HTML("""
                         <div class='info-box' style='margin-top: 20px;'>
-                            <strong>📦 Required Dependencies for PDF Support:</strong><br>
-                            <code>pip install pdf2image PyMuPDF pillow</code><br><br>
-                            <strong>Note:</strong> pdf2image also requires poppler-utils installed on your system.
                         </div>
                     """)
@@ -512,7 +711,7 @@ with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as app:
                                     <p style='margin: 10px 0 0 0; color: #666; line-height: 1.6;'>
                                         • Multi-page support<br>
                                         • Automatic conversion to images<br>
-                                        • Layout analysis<br>
                                         • Scanned documents<br>
                                         • Forms and tables
                                     </p>
@@ -523,138 +722,42 @@ with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as app:
                                         • Plain text documents<br>
                                         • Code snippets<br>
                                         • Notes and logs<br>
-                                        • Data files<br>
                                         • Configuration files
                                     </p>
                                 </div>
                             </div>
                         </div>
-                        <div style='margin-top: 30px; text-align: center;'>
-                            <h2 style='color: #667eea; font-size: 2em; margin-bottom: 20px;'>🎯 What the Model Excels At</h2>
-                        </div>
-                        <div class='capability-card' style='background: linear-gradient(135deg, #e8f5e9 0%, #f1f8e9 100%);'>
-                            <h3 style='color: #2e7d32;'>📊 1. Document Intelligence</h3>
-                            <div style='background: white; padding: 20px; border-radius: 10px; margin-top: 15px;'>
-                                <ul style='color: #666; line-height: 2; margin: 0; padding-left: 20px;'>
-                                    <li><strong>Multi-page PDF analysis</strong> - Process entire documents at once</li>
-                                    <li><strong>Scanned documents</strong> - Extract text from scans and photos of documents</li>
-                                    <li><strong>Forms and tables</strong> - Understand structured data layouts</li>
-                                    <li><strong>Financial reports</strong> - Parse complex financial documents</li>
-                                    <li><strong>Receipts and invoices</strong> - Extract itemized information</li>
-                                    <li><strong>Academic papers</strong> - Understand scientific content and citations</li>
-                                </ul>
-                            </div>
-                        </div>
-                        <div class='capability-card' style='background: linear-gradient(135deg, #fff9c4 0%, #fff3e0 100%);'>
-                            <h3 style='color: #f57f17;'>🔤 2. OCR Excellence (Optical Character Recognition)</h3>
-                            <div style='background: white; padding: 20px; border-radius: 10px; margin-top: 15px;'>
-                                <ul style='color: #666; line-height: 2; margin: 0; padding-left: 20px;'>
-                                    <li><strong>Handwritten text</strong> - Recognize cursive and printed handwriting</li>
-                                    <li><strong>Printed text</strong> - Extract text from any printed material</li>
-                                    <li><strong>Text in images</strong> - Find and read text embedded in photos</li>
-                                    <li><strong>Multi-language support</strong> - Handle various languages and scripts</li>
-                                    <li><strong>Low-quality images</strong> - Work with blurry or low-resolution scans</li>
-                                    <li><strong>Complex layouts</strong> - Handle multi-column and mixed layouts</li>
-                                </ul>
-                            </div>
-                        </div>
-                        <div class='capability-card' style='background: linear-gradient(135deg, #e1bee7 0%, #f3e5f5 100%);'>
-                            <h3 style='color: #6a1b9a;'>📈 3. Chart & Graph Analysis</h3>
-                            <div style='background: white; padding: 20px; border-radius: 10px; margin-top: 15px;'>
-                                <ul style='color: #666; line-height: 2; margin: 0; padding-left: 20px;'>
-                                    <li><strong>Bar charts</strong> - Interpret categorical data comparisons</li>
-                                    <li><strong>Line graphs</strong> - Analyze trends over time</li>
-                                    <li><strong>Pie charts</strong> - Understand proportional distributions</li>
-                                    <li><strong>Scatter plots</strong> - Identify correlations and patterns</li>
-                                    <li><strong>Complex visualizations</strong> - Parse multi-axis and combined charts</li>
-                                    <li><strong>Infographics</strong> - Extract insights from visual data stories</li>
-                                </ul>
-                            </div>
-                        </div>
-                        <div class='capability-card' style='background: linear-gradient(135deg, #b3e5fc 0%, #e1f5fe 100%);'>
-                            <h3 style='color: #01579b;'>🎬 4. Video Understanding (Frame-by-Frame)</h3>
-                            <div style='background: white; padding: 20px; border-radius: 10px; margin-top: 15px;'>
-                                <ul style='color: #666; line-height: 2; margin: 0; padding-left: 20px;'>
-                                    <li><strong>Sequential frames</strong> - Upload multiple frames from videos</li>
-                                    <li><strong>Action recognition</strong> - Understand what's happening across frames</li>
-                                    <li><strong>Temporal analysis</strong> - Track changes over time</li>
-                                    <li><strong>Scene understanding</strong> - Comprehend context and setting</li>
-                                    <li><strong>Object tracking</strong> - Follow objects across frames</li>
-                                    <li><strong>Event detection</strong> - Identify key moments in sequences</li>
-                                </ul>
-                            </div>
-                        </div>
-                        <div class='capability-card' style='background: linear-gradient(135deg, #ffccbc 0%, #ffe0b2 100%);'>
-                            <h3 style='color: #bf360c;'>📑 5. Multi-Image Document Processing</h3>
-                            <div style='background: white; padding: 20px; border-radius: 10px; margin-top: 15px;'>
-                                <ul style='color: #666; line-height: 2; margin: 0; padding-left: 20px;'>
-                                    <li><strong>Multiple pages at once</strong> - Upload and analyze entire documents</li>
-                                    <li><strong>Cross-reference</strong> - Connect information across different images</li>
-                                    <li><strong>Document comparison</strong> - Compare versions or similar documents</li>
-                                    <li><strong>Batch processing</strong> - Handle multiple documents simultaneously</li>
-                                    <li><strong>Presentation slides</strong> - Understand slide decks and flow</li>
-                                    <li><strong>Comic books/Manga</strong> - Follow visual narratives</li>
-                                </ul>
-                            </div>
-                        </div>
-                        <div class='capability-card' style='background: linear-gradient(135deg, #c5e1a5 0%, #dcedc8 100%);'>
-                            <h3 style='color: #33691e;'>🧠 6. Advanced Reasoning</h3>
-                            <div style='background: white; padding: 20px; border-radius: 10px; margin-top: 15px;'>
-                                <ul style='color: #666; line-height: 2; margin: 0; padding-left: 20px;'>
-                                    <li><strong>Step-by-step thinking</strong> - See the model's reasoning process</li>
-                                    <li><strong>Mathematical problems</strong> - Solve complex math with visual elements</li>
-                                    <li><strong>Logical deduction</strong> - Draw conclusions from visual evidence</li>
-                                    <li><strong>Problem decomposition</strong> - Break down complex questions</li>
-                                    <li><strong>Visual reasoning</strong> - Understand spatial and logical relationships</li>
-                                    <li><strong>Transparent thinking</strong> - Explain how conclusions are reached</li>
-                                </ul>
-                            </div>
-                        </div>
-                        <div class='success-box' style='margin-top: 30px; font-size: 1.05em;'>
-                            <strong>💡 Pro Tips for Best Results:</strong><br><br>
-                            ✅ <strong>High-quality images</strong> - Use clear, well-lit photos for better OCR<br>
-                            ✅ <strong>Multiple angles</strong> - Upload different views for complex objects<br>
-                            ✅ <strong>Specific questions</strong> - Ask targeted questions for precise answers<br>
-                            ✅ <strong>Enable reasoning</strong> - Turn on reasoning mode for complex analysis<br>
-                            ✅ <strong>Sequential order</strong> - Upload video frames in chronological order<br>
-                            ✅ <strong>Context matters</strong> - Provide background information for better understanding
-                        </div>
-                        <div style='background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; padding: 25px; border-radius: 16px; text-align: center; margin-top: 30px;'>
-                            <h3 style='margin: 0 0 10px 0; font-size: 1.5em;'>🚀 Ready to Get Started?</h3>
-                            <p style='margin: 0; font-size: 1.1em; opacity: 0.95;'>
-                                Upload your files in the Chat Interface tab and experience the power of Nemotron Nano 2 VL!
-                            </p>
                         </div>
                     """)
-                # Examples Tab
-                with gr.Tab("📚 Examples & Capabilities", elem_classes=["tab-nav"]):
                     gr.HTML("""
                         <div class='capability-card'>
-                            <h3>📊 Document Intelligence</h3>
                             <p><strong>Example:</strong> "Extract all the key metrics from this financial report"</p>
-                            <p>Nemotron excels at understanding complex documents, tables, and structured data.</p>
                         </div>
                         <div class='capability-card'>
-                            <h3>🔤 OCR Excellence</h3>
-                            <p><strong>Example:</strong> "What text appears in this image?"</p>
-                            <p>State-of-the-art optical character recognition for any text in images.</p>
                         </div>
                         <div class='capability-card'>
-                            <h3>📈 Chart & Graph Analysis</h3>
-                            <p><strong>Example:</strong> "What trends do you see in this chart?"</p>
-                            <p>Analyze charts, graphs, and data visualizations with high accuracy.</p>
                         </div>
                         <div class='capability-card'>
@@ -664,26 +767,26 @@ with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as app:
                         </div>
                         <div class='capability-card'>
-                            <h3>🎬 Video Understanding</h3>
-                            <p><strong>Example:</strong> Upload video frames and ask "What's happening in this sequence?"</p>
-                            <p>Process multiple frames to understand temporal sequences and events.</p>
                         </div>
                         <div class='capability-card'>
-                            <h3>📑 Multi-Image Documents</h3>
-                            <p><strong>Example:</strong> Upload multiple pages and ask "Summarize this document"</p>
-                            <p>Handle multi-page documents and complex layouts with ease.</p>
                         </div>
                     """)
                     gr.HTML("""
                         <div class='success-box' style='margin-top: 30px;'>
                             <strong>💡 Pro Tips:</strong><br>
-                            • Upload multiple images for document analysis<br>
-                            • Enable reasoning mode for complex problems<br>
-                            • Adjust temperature for creative vs precise outputs<br>
-                            • Use specific questions for better OCR results<br>
-                            • Try video frame sequences for temporal analysis
                         </div>
                     """)
@@ -744,10 +847,15 @@ with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as app:
         outputs=[api_status]
     )
     submit_btn.click(
         fn=process_message,
         inputs=[msg, chatbot, files, enable_reasoning, temperature, max_tokens],
-        outputs=[chatbot, reasoning_display]
     ).then(
         lambda: ("", None),
         outputs=[msg, files]
@@ -756,7 +864,7 @@ with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as app:
     msg.submit(
         fn=process_message,
         inputs=[msg, chatbot, files, enable_reasoning, temperature, max_tokens],
-        outputs=[chatbot, reasoning_display]
     ).then(
         lambda: ("", None),
         outputs=[msg, files]
@@ -764,11 +872,11 @@ with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as app:
     clear_btn.click(
         fn=clear_conversation,
-        outputs=[chatbot, reasoning_display]
     )
 # Launch the app
 if __name__ == "__main__":
     app.launch(
-        share=True,
     )

 import time
 from PIL import Image
 import io
+import sys
 # Global client variable
 client = None
     with open(image_path, "rb") as image_file:
         return base64.b64encode(image_file.read()).decode('utf-8')
+def pdf_to_images_pymupdf(pdf_path: str) -> List[Image.Image]:
+    """Convert PDF to images using PyMuPDF (primary method)"""
+    try:
+        import fitz  # PyMuPDF
+        doc = fitz.open(pdf_path)
+        images = []
+        for page_num in range(len(doc)):
+            page = doc[page_num]
+            # Render at 2x resolution for better quality
+            mat = fitz.Matrix(2, 2)
+            pix = page.get_pixmap(matrix=mat)
+            # Convert to PIL Image
+            img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
+            images.append(img)
+        doc.close()
+        return images
+    except Exception as e:
+        raise Exception(f"PyMuPDF error: {str(e)}")
+def pdf_to_images_pdf2image(pdf_path: str) -> List[Image.Image]:
+    """Convert PDF to images using pdf2image (requires poppler)"""
     try:
         from pdf2image import convert_from_path
         images = convert_from_path(pdf_path, dpi=200)
         return images
+    except Exception as e:
+        raise Exception(f"pdf2image error: {str(e)}")
+def pdf_to_images(pdf_path: str) -> Tuple[List[Image.Image], str]:
+    """
+    Convert PDF to images with multiple fallback methods
+    Returns: (list of images, method used or error message)
+    """
+    # Try PyMuPDF first (doesn't require poppler)
+    try:
+        images = pdf_to_images_pymupdf(pdf_path)
+        return images, "PyMuPDF"
+    except Exception as e1:
+        pymupdf_error = str(e1)
+        # Try pdf2image as fallback
         try:
+            images = pdf_to_images_pdf2image(pdf_path)
+            return images, "pdf2image"
+        except Exception as e2:
+            pdf2image_error = str(e2)
+            # Both methods failed
+            error_msg = f"""PDF conversion failed. Tried multiple methods:
+1. PyMuPDF: {pymupdf_error}
+2. pdf2image: {pdf2image_error}
+SOLUTION:
+Install PyMuPDF (recommended - no external dependencies):
+  pip install PyMuPDF
+OR install pdf2image + poppler:
+  pip install pdf2image
+  Then install poppler:
+  - Ubuntu/Debian: sudo apt-get install poppler-utils
+  - macOS: brew install poppler
+  - Windows: Download from https://github.com/oschwartz10612/poppler-windows/releases/
+"""
+            raise Exception(error_msg)
 def image_to_base64(image: Image.Image, format: str = "PNG") -> str:
     """Convert PIL Image to base64"""
     buffered = io.BytesIO()
+    # Convert RGBA to RGB if needed
+    if image.mode == 'RGBA':
+        background = Image.new('RGB', image.size, (255, 255, 255))
+        background.paste(image, mask=image.split()[3])
+        image = background
+    elif image.mode != 'RGB':
+        image = image.convert('RGB')
+    image.save(buffered, format=format, quality=95)
     return base64.b64encode(buffered.getvalue()).decode('utf-8')
+def process_file(file_path: str) -> Tuple[List[dict], str]:
+    """
+    Process a file and return content blocks for API
+    Returns: (content_blocks, status_message)
+    """
     file_extension = Path(file_path).suffix.lower()
+    file_name = Path(file_path).name
     content_blocks = []
+    status_message = ""
     try:
         if file_extension == '.pdf':
             # Convert PDF pages to images
+            images, method = pdf_to_images(file_path)
+            status_message = f"✅ PDF '{file_name}' converted to {len(images)} page(s) using {method}"
+            for idx, img in enumerate(images, 1):
+                base64_image = image_to_base64(img, format="JPEG")
                 content_blocks.append({
                     "type": "image_url",
                     "image_url": {
+                        "url": f"data:image/jpeg;base64,{base64_image}"
                     }
                 })
         elif file_extension == '.txt':
             # Read text file
+            try:
+                with open(file_path, 'r', encoding='utf-8') as f:
+                    text_content = f.read()
+            except UnicodeDecodeError:
+                # Try with different encoding
+                with open(file_path, 'r', encoding='latin-1') as f:
+                    text_content = f.read()
+            status_message = f"✅ Text file '{file_name}' loaded ({len(text_content)} characters)"
             content_blocks.append({
                 "type": "text",
+                "text": f"📄 Content from '{file_name}':\n\n{text_content}"
             })
         else:
             # Handle image files
             # Determine MIME type
             elif file_extension in ['.tiff', '.tif']:
                 mime_type = "image/tiff"
+            # Load and potentially convert the image
+            try:
+                img = Image.open(file_path)
+                # Convert to RGB if necessary
+                if img.mode in ('RGBA', 'LA', 'P'):
+                    background = Image.new('RGB', img.size, (255, 255, 255))
+                    if img.mode == 'P':
+                        img = img.convert('RGBA')
+                    if img.mode in ('RGBA', 'LA'):
+                        background.paste(img, mask=img.split()[-1] if img.mode in ('RGBA', 'LA') else None)
+                    img = background
+                elif img.mode != 'RGB':
+                    img = img.convert('RGB')
+                # Convert to base64
+                base64_image = image_to_base64(img, format="JPEG")
+                status_message = f"✅ Image '{file_name}' loaded ({img.width}x{img.height})"
+                content_blocks.append({
+                    "type": "image_url",
+                    "image_url": {
+                        "url": f"data:image/jpeg;base64,{base64_image}"
+                    }
+                })
+            except Exception as img_error:
+                # If image processing fails, try direct base64 encoding
+                base64_image = encode_image(file_path)
+                status_message = f"✅ Image '{file_name}' loaded (direct encoding)"
+                content_blocks.append({
+                    "type": "image_url",
+                    "image_url": {
+                        "url": f"data:{mime_type};base64,{base64_image}"
+                    }
+                })
     except Exception as e:
+        error_msg = f"❌ Error processing '{file_name}': {str(e)}"
         content_blocks.append({
             "type": "text",
+            "text": error_msg
         })
+        status_message = error_msg
+    return content_blocks, status_message
 def process_message(
     message: str,
     enable_reasoning: bool = True,
     temperature: float = 0.7,
     max_tokens: int = 2000
+) -> Tuple[List[Tuple[str, str]], str, str]:
+    """
+    Process user message and generate response
+    Returns: (updated_history, reasoning_text, status_message)
+    """
     global client
     if client is None:
+        return history + [(message, "❌ Please configure your API key first in the Settings tab.")], "", ""
     if not message.strip() and not files:
+        return history + [(message, "⚠️ Please enter a message or upload files.")], "", ""
+    status_messages = []
     try:
         # Build messages array
         # Process files if provided
         if files:
             file_count = 0
+            total_pages = 0
             for file in files:
                 if file is not None:
+                    file_blocks, status = process_file(file)
                     content.extend(file_blocks)
+                    status_messages.append(status)
                     file_count += 1
+                    # Count pages for PDFs
+                    if status.startswith("✅") and "page(s)" in status:
+                        try:
+                            pages = int(status.split("converted to ")[1].split(" page(s)")[0])
+                            total_pages += pages
+                        except:
+                            pass
             if file_count > 0:
+                file_summary = f"📎 {file_count} file(s) uploaded"
+                if total_pages > 0:
+                    file_summary += f" ({total_pages} PDF pages)"
+                content.insert(0, {"type": "text", "text": file_summary})
         # Add text message
         if message.strip():
         if enable_reasoning and hasattr(response.choices[0].message, 'reasoning_details'):
             reasoning_details = response.choices[0].message.reasoning_details
             if reasoning_details:
+                reasoning_text = f"**🧠 Reasoning Process:**\n{json.dumps(reasoning_details, indent=2)}"
         # Update history
         new_history = history + [(message, assistant_message)]
+        # Combine status messages
+        combined_status = "\n".join(status_messages) if status_messages else "✅ Message processed successfully"
+        return new_history, reasoning_text, combined_status
     except Exception as e:
         error_message = f"❌ Error: {str(e)}"
+        return history + [(message, error_message)], "", error_message
 def clear_conversation():
     """Clear conversation history"""
+    return [], "", ""
+def check_dependencies() -> str:
+    """Check which PDF processing libraries are available"""
+    status = "**📦 PDF Processing Dependencies Status:**\n\n"
+    # Check PyMuPDF
+    try:
+        import fitz
+        status += "✅ **PyMuPDF (fitz)**: Installed and ready!\n"
+        status += "   - No external dependencies needed\n"
+        status += "   - This is the primary PDF processing method\n\n"
+    except ImportError:
+        status += "❌ **PyMuPDF (fitz)**: Not installed\n"
+        status += "   - Install: `pip install PyMuPDF`\n\n"
+    # Check pdf2image
+    try:
+        import pdf2image
+        status += "✅ **pdf2image**: Installed\n"
+        status += "   - Requires poppler-utils (external)\n"
+        # Try to check if poppler is available
+        try:
+            from pdf2image.exceptions import PDFInfoNotInstalledError
+            from pdf2image import pdfinfo_from_path
+            # This will throw an error if poppler is not found
+            status += "   - Checking poppler availability...\n"
+        except:
+            status += "   - ⚠️ poppler-utils may not be installed\n"
+        status += "\n"
+    except ImportError:
+        status += "⚠️ **pdf2image**: Not installed (optional fallback)\n"
+        status += "   - Install: `pip install pdf2image`\n\n"
+    # Check PIL/Pillow
+    try:
+        from PIL import Image
+        status += "✅ **Pillow (PIL)**: Installed and ready!\n\n"
+    except ImportError:
+        status += "❌ **Pillow (PIL)**: Not installed\n"
+        status += "   - Install: `pip install Pillow`\n\n"
+    status += "**💡 Recommendation:**\n"
+    status += "Install PyMuPDF for the best PDF support:\n"
+    status += "`pip install PyMuPDF Pillow`"
+    return status
 # Custom CSS for premium design
 custom_css = """
                         elem_classes=["chatbot"]
                     )
+                    file_status = gr.Textbox(
+                        label="📋 File Processing Status",
+                        lines=2,
+                        interactive=False,
+                        visible=True
+                    )
                     with gr.Row():
                         msg = gr.Textbox(
                             label="Your Message",
                         info="Maximum length of response"
                     )
+                    gr.HTML("<hr style='margin: 30px 0; border: none; border-top: 2px solid #e0e7ff;'>")
+                    gr.HTML("""
+                        <div class='info-box'>
+                            <strong>📦 Check Dependencies</strong><br>
+                            Verify that PDF processing libraries are installed
+                        </div>
+                    """)
+                    check_deps_btn = gr.Button("🔍 Check Dependencies", variant="secondary", elem_classes=["secondary"])
+                    deps_status = gr.Markdown(label="Dependency Status")
                     gr.HTML("""
                         <div class='info-box' style='margin-top: 20px;'>
+                            <strong>📦 Installation Guide:</strong><br><br>
+                            <strong>Recommended (PyMuPDF - No external dependencies):</strong><br>
+                            <code>pip install PyMuPDF Pillow openai gradio</code><br><br>
+                            <strong>Alternative (pdf2image - Requires poppler):</strong><br>
+                            <code>pip install pdf2image Pillow openai gradio</code><br><br>
+                            <strong>Poppler installation (for pdf2image):</strong><br>
+                            • Ubuntu/Debian: <code>sudo apt-get install poppler-utils</code><br>
+                            • macOS: <code>brew install poppler</code><br>
+                            • Windows: Download from <a href="https://github.com/oschwartz10612/poppler-windows/releases/" target="_blank">GitHub</a>
                         </div>
                     """)
                                     <p style='margin: 10px 0 0 0; color: #666; line-height: 1.6;'>
                                         • Multi-page support<br>
                                         • Automatic conversion to images<br>
+                                        • PyMuPDF (recommended)<br>
                                         • Scanned documents<br>
                                         • Forms and tables
                                     </p>
                                         • Plain text documents<br>
                                         • Code snippets<br>
                                         • Notes and logs<br>
+                                        • UTF-8 encoding<br>
                                         • Configuration files
                                     </p>
                                 </div>
                             </div>
                         </div>
+                        <div class='success-box' style='margin-top: 20px;'>
+                            <strong>🚀 PDF Processing:</strong><br>
+                            This app uses <strong>PyMuPDF (fitz)</strong> as the primary method for PDF conversion.<br>
+                            • ✅ No external dependencies (no poppler needed)<br>
+                            • ✅ Fast and reliable<br>
+                            • ✅ Automatic fallback to pdf2image if needed<br>
+                            • ✅ Clear error messages with installation instructions
                         </div>
                     """)
+                # Examples Tab
+                with gr.Tab("📚 Use Cases", elem_classes=["tab-nav"]):
                     gr.HTML("""
                         <div class='capability-card'>
+                            <h3>📊 Financial Report Analysis</h3>
                             <p><strong>Example:</strong> "Extract all the key metrics from this financial report"</p>
+                            <p><strong>What it extracts:</strong> Revenue, Net Profit, EBITDA, Cash Flow, Assets, Liabilities, Ratios, YoY Growth</p>
                         </div>
                         <div class='capability-card'>
+                            <h3>🔤 OCR & Text Extraction</h3>
+                            <p><strong>Example:</strong> "What text appears in this scanned document?"</p>
+                            <p>State-of-the-art optical character recognition for any text in images or PDFs.</p>
                         </div>
                         <div class='capability-card'>
+                            <h3>📈 Chart & Data Visualization</h3>
+                            <p><strong>Example:</strong> "Analyze the trends in these charts"</p>
+                            <p>Understand bar charts, line graphs, pie charts, scatter plots, and complex visualizations.</p>
                         </div>
                         <div class='capability-card'>
                         </div>
                         <div class='capability-card'>
+                            <h3>📑 Multi-Page Documents</h3>
+                            <p><strong>Example:</strong> Upload a PDF and ask "Summarize the key points from all pages"</p>
+                            <p>Process entire documents with multiple pages simultaneously.</p>
                         </div>
                         <div class='capability-card'>
+                            <h3>🏢 Business Document Processing</h3>
+                            <p><strong>Example:</strong> "Extract information from this invoice/receipt/form"</p>
+                            <p>Handle invoices, receipts, forms, contracts, and structured business documents.</p>
                         </div>
                     """)
                     gr.HTML("""
                         <div class='success-box' style='margin-top: 30px;'>
                             <strong>💡 Pro Tips:</strong><br>
+                            • Upload high-quality scans for best OCR results<br>
+                            • Enable reasoning mode for complex financial analysis<br>
+                            • Ask specific questions to get targeted information<br>
+                            • Upload multiple related documents for comparison<br>
+                            • Use clear, descriptive questions for better answers
                         </div>
                     """)
         outputs=[api_status]
     )
+    check_deps_btn.click(
+        fn=check_dependencies,
+        outputs=[deps_status]
+    )
     submit_btn.click(
         fn=process_message,
         inputs=[msg, chatbot, files, enable_reasoning, temperature, max_tokens],
+        outputs=[chatbot, reasoning_display, file_status]
     ).then(
         lambda: ("", None),
         outputs=[msg, files]
     msg.submit(
         fn=process_message,
         inputs=[msg, chatbot, files, enable_reasoning, temperature, max_tokens],
+        outputs=[chatbot, reasoning_display, file_status]
     ).then(
         lambda: ("", None),
         outputs=[msg, files]
     clear_btn.click(
         fn=clear_conversation,
+        outputs=[chatbot, reasoning_display, file_status]
     )
 # Launch the app
 if __name__ == "__main__":
     app.launch(
+        share=True
     )