Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -7,6 +7,7 @@ from typing import List, Tuple, Optional
|
|
| 7 |
import time
|
| 8 |
from PIL import Image
|
| 9 |
import io
|
|
|
|
| 10 |
|
| 11 |
# Global client variable
|
| 12 |
client = None
|
|
@@ -31,59 +32,132 @@ def encode_image(image_path: str) -> str:
|
|
| 31 |
with open(image_path, "rb") as image_file:
|
| 32 |
return base64.b64encode(image_file.read()).decode('utf-8')
|
| 33 |
|
| 34 |
-
def
|
| 35 |
-
"""Convert PDF to images using
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
try:
|
| 37 |
from pdf2image import convert_from_path
|
| 38 |
images = convert_from_path(pdf_path, dpi=200)
|
| 39 |
return images
|
| 40 |
-
except
|
| 41 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
try:
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
|
| 56 |
def image_to_base64(image: Image.Image, format: str = "PNG") -> str:
|
| 57 |
"""Convert PIL Image to base64"""
|
| 58 |
buffered = io.BytesIO()
|
| 59 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
return base64.b64encode(buffered.getvalue()).decode('utf-8')
|
| 61 |
|
| 62 |
-
def process_file(file_path: str) -> List[dict]:
|
| 63 |
-
"""
|
|
|
|
|
|
|
|
|
|
| 64 |
file_extension = Path(file_path).suffix.lower()
|
|
|
|
| 65 |
content_blocks = []
|
|
|
|
| 66 |
|
| 67 |
try:
|
| 68 |
if file_extension == '.pdf':
|
| 69 |
# Convert PDF pages to images
|
| 70 |
-
images = pdf_to_images(file_path)
|
| 71 |
-
|
| 72 |
-
|
|
|
|
|
|
|
| 73 |
content_blocks.append({
|
| 74 |
"type": "image_url",
|
| 75 |
"image_url": {
|
| 76 |
-
"url": f"data:image/
|
| 77 |
}
|
| 78 |
})
|
|
|
|
| 79 |
elif file_extension == '.txt':
|
| 80 |
# Read text file
|
| 81 |
-
|
| 82 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 83 |
content_blocks.append({
|
| 84 |
"type": "text",
|
| 85 |
-
"text": f"
|
| 86 |
})
|
|
|
|
| 87 |
else:
|
| 88 |
# Handle image files
|
| 89 |
# Determine MIME type
|
|
@@ -99,20 +173,51 @@ def process_file(file_path: str) -> List[dict]:
|
|
| 99 |
elif file_extension in ['.tiff', '.tif']:
|
| 100 |
mime_type = "image/tiff"
|
| 101 |
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 109 |
except Exception as e:
|
|
|
|
| 110 |
content_blocks.append({
|
| 111 |
"type": "text",
|
| 112 |
-
"text":
|
| 113 |
})
|
|
|
|
| 114 |
|
| 115 |
-
return content_blocks
|
| 116 |
|
| 117 |
def process_message(
|
| 118 |
message: str,
|
|
@@ -121,15 +226,20 @@ def process_message(
|
|
| 121 |
enable_reasoning: bool = True,
|
| 122 |
temperature: float = 0.7,
|
| 123 |
max_tokens: int = 2000
|
| 124 |
-
) -> Tuple[List[Tuple[str, str]], str]:
|
| 125 |
-
"""
|
|
|
|
|
|
|
|
|
|
| 126 |
global client
|
| 127 |
|
| 128 |
if client is None:
|
| 129 |
-
return history + [(message, "β Please configure your API key first in the Settings tab.")], ""
|
| 130 |
|
| 131 |
if not message.strip() and not files:
|
| 132 |
-
return history + [(message, "β οΈ Please enter a message or upload files.")], ""
|
|
|
|
|
|
|
| 133 |
|
| 134 |
try:
|
| 135 |
# Build messages array
|
|
@@ -147,17 +257,28 @@ def process_message(
|
|
| 147 |
# Process files if provided
|
| 148 |
if files:
|
| 149 |
file_count = 0
|
|
|
|
|
|
|
| 150 |
for file in files:
|
| 151 |
if file is not None:
|
| 152 |
-
file_blocks = process_file(file)
|
| 153 |
content.extend(file_blocks)
|
|
|
|
| 154 |
file_count += 1
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 155 |
|
| 156 |
if file_count > 0:
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
})
|
| 161 |
|
| 162 |
# Add text message
|
| 163 |
if message.strip():
|
|
@@ -187,20 +308,71 @@ def process_message(
|
|
| 187 |
if enable_reasoning and hasattr(response.choices[0].message, 'reasoning_details'):
|
| 188 |
reasoning_details = response.choices[0].message.reasoning_details
|
| 189 |
if reasoning_details:
|
| 190 |
-
reasoning_text = f"
|
| 191 |
|
| 192 |
# Update history
|
| 193 |
new_history = history + [(message, assistant_message)]
|
| 194 |
|
| 195 |
-
|
|
|
|
|
|
|
|
|
|
| 196 |
|
| 197 |
except Exception as e:
|
| 198 |
error_message = f"β Error: {str(e)}"
|
| 199 |
-
return history + [(message, error_message)], ""
|
| 200 |
|
| 201 |
def clear_conversation():
|
| 202 |
"""Clear conversation history"""
|
| 203 |
-
return [], ""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 204 |
|
| 205 |
# Custom CSS for premium design
|
| 206 |
custom_css = """
|
|
@@ -375,6 +547,13 @@ with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as app:
|
|
| 375 |
elem_classes=["chatbot"]
|
| 376 |
)
|
| 377 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 378 |
with gr.Row():
|
| 379 |
msg = gr.Textbox(
|
| 380 |
label="Your Message",
|
|
@@ -454,11 +633,31 @@ with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as app:
|
|
| 454 |
info="Maximum length of response"
|
| 455 |
)
|
| 456 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 457 |
gr.HTML("""
|
| 458 |
<div class='info-box' style='margin-top: 20px;'>
|
| 459 |
-
<strong>π¦
|
| 460 |
-
<
|
| 461 |
-
<
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 462 |
</div>
|
| 463 |
""")
|
| 464 |
|
|
@@ -512,7 +711,7 @@ with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as app:
|
|
| 512 |
<p style='margin: 10px 0 0 0; color: #666; line-height: 1.6;'>
|
| 513 |
β’ Multi-page support<br>
|
| 514 |
β’ Automatic conversion to images<br>
|
| 515 |
-
β’
|
| 516 |
β’ Scanned documents<br>
|
| 517 |
β’ Forms and tables
|
| 518 |
</p>
|
|
@@ -523,138 +722,42 @@ with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as app:
|
|
| 523 |
β’ Plain text documents<br>
|
| 524 |
β’ Code snippets<br>
|
| 525 |
β’ Notes and logs<br>
|
| 526 |
-
β’
|
| 527 |
β’ Configuration files
|
| 528 |
</p>
|
| 529 |
</div>
|
| 530 |
</div>
|
| 531 |
</div>
|
| 532 |
|
| 533 |
-
<div style='margin-top:
|
| 534 |
-
<
|
| 535 |
-
|
| 536 |
-
|
| 537 |
-
|
| 538 |
-
|
| 539 |
-
|
| 540 |
-
<ul style='color: #666; line-height: 2; margin: 0; padding-left: 20px;'>
|
| 541 |
-
<li><strong>Multi-page PDF analysis</strong> - Process entire documents at once</li>
|
| 542 |
-
<li><strong>Scanned documents</strong> - Extract text from scans and photos of documents</li>
|
| 543 |
-
<li><strong>Forms and tables</strong> - Understand structured data layouts</li>
|
| 544 |
-
<li><strong>Financial reports</strong> - Parse complex financial documents</li>
|
| 545 |
-
<li><strong>Receipts and invoices</strong> - Extract itemized information</li>
|
| 546 |
-
<li><strong>Academic papers</strong> - Understand scientific content and citations</li>
|
| 547 |
-
</ul>
|
| 548 |
-
</div>
|
| 549 |
-
</div>
|
| 550 |
-
|
| 551 |
-
<div class='capability-card' style='background: linear-gradient(135deg, #fff9c4 0%, #fff3e0 100%);'>
|
| 552 |
-
<h3 style='color: #f57f17;'>π€ 2. OCR Excellence (Optical Character Recognition)</h3>
|
| 553 |
-
<div style='background: white; padding: 20px; border-radius: 10px; margin-top: 15px;'>
|
| 554 |
-
<ul style='color: #666; line-height: 2; margin: 0; padding-left: 20px;'>
|
| 555 |
-
<li><strong>Handwritten text</strong> - Recognize cursive and printed handwriting</li>
|
| 556 |
-
<li><strong>Printed text</strong> - Extract text from any printed material</li>
|
| 557 |
-
<li><strong>Text in images</strong> - Find and read text embedded in photos</li>
|
| 558 |
-
<li><strong>Multi-language support</strong> - Handle various languages and scripts</li>
|
| 559 |
-
<li><strong>Low-quality images</strong> - Work with blurry or low-resolution scans</li>
|
| 560 |
-
<li><strong>Complex layouts</strong> - Handle multi-column and mixed layouts</li>
|
| 561 |
-
</ul>
|
| 562 |
-
</div>
|
| 563 |
-
</div>
|
| 564 |
-
|
| 565 |
-
<div class='capability-card' style='background: linear-gradient(135deg, #e1bee7 0%, #f3e5f5 100%);'>
|
| 566 |
-
<h3 style='color: #6a1b9a;'>π 3. Chart & Graph Analysis</h3>
|
| 567 |
-
<div style='background: white; padding: 20px; border-radius: 10px; margin-top: 15px;'>
|
| 568 |
-
<ul style='color: #666; line-height: 2; margin: 0; padding-left: 20px;'>
|
| 569 |
-
<li><strong>Bar charts</strong> - Interpret categorical data comparisons</li>
|
| 570 |
-
<li><strong>Line graphs</strong> - Analyze trends over time</li>
|
| 571 |
-
<li><strong>Pie charts</strong> - Understand proportional distributions</li>
|
| 572 |
-
<li><strong>Scatter plots</strong> - Identify correlations and patterns</li>
|
| 573 |
-
<li><strong>Complex visualizations</strong> - Parse multi-axis and combined charts</li>
|
| 574 |
-
<li><strong>Infographics</strong> - Extract insights from visual data stories</li>
|
| 575 |
-
</ul>
|
| 576 |
-
</div>
|
| 577 |
-
</div>
|
| 578 |
-
|
| 579 |
-
<div class='capability-card' style='background: linear-gradient(135deg, #b3e5fc 0%, #e1f5fe 100%);'>
|
| 580 |
-
<h3 style='color: #01579b;'>π¬ 4. Video Understanding (Frame-by-Frame)</h3>
|
| 581 |
-
<div style='background: white; padding: 20px; border-radius: 10px; margin-top: 15px;'>
|
| 582 |
-
<ul style='color: #666; line-height: 2; margin: 0; padding-left: 20px;'>
|
| 583 |
-
<li><strong>Sequential frames</strong> - Upload multiple frames from videos</li>
|
| 584 |
-
<li><strong>Action recognition</strong> - Understand what's happening across frames</li>
|
| 585 |
-
<li><strong>Temporal analysis</strong> - Track changes over time</li>
|
| 586 |
-
<li><strong>Scene understanding</strong> - Comprehend context and setting</li>
|
| 587 |
-
<li><strong>Object tracking</strong> - Follow objects across frames</li>
|
| 588 |
-
<li><strong>Event detection</strong> - Identify key moments in sequences</li>
|
| 589 |
-
</ul>
|
| 590 |
-
</div>
|
| 591 |
-
</div>
|
| 592 |
-
|
| 593 |
-
<div class='capability-card' style='background: linear-gradient(135deg, #ffccbc 0%, #ffe0b2 100%);'>
|
| 594 |
-
<h3 style='color: #bf360c;'>π 5. Multi-Image Document Processing</h3>
|
| 595 |
-
<div style='background: white; padding: 20px; border-radius: 10px; margin-top: 15px;'>
|
| 596 |
-
<ul style='color: #666; line-height: 2; margin: 0; padding-left: 20px;'>
|
| 597 |
-
<li><strong>Multiple pages at once</strong> - Upload and analyze entire documents</li>
|
| 598 |
-
<li><strong>Cross-reference</strong> - Connect information across different images</li>
|
| 599 |
-
<li><strong>Document comparison</strong> - Compare versions or similar documents</li>
|
| 600 |
-
<li><strong>Batch processing</strong> - Handle multiple documents simultaneously</li>
|
| 601 |
-
<li><strong>Presentation slides</strong> - Understand slide decks and flow</li>
|
| 602 |
-
<li><strong>Comic books/Manga</strong> - Follow visual narratives</li>
|
| 603 |
-
</ul>
|
| 604 |
-
</div>
|
| 605 |
-
</div>
|
| 606 |
-
|
| 607 |
-
<div class='capability-card' style='background: linear-gradient(135deg, #c5e1a5 0%, #dcedc8 100%);'>
|
| 608 |
-
<h3 style='color: #33691e;'>π§ 6. Advanced Reasoning</h3>
|
| 609 |
-
<div style='background: white; padding: 20px; border-radius: 10px; margin-top: 15px;'>
|
| 610 |
-
<ul style='color: #666; line-height: 2; margin: 0; padding-left: 20px;'>
|
| 611 |
-
<li><strong>Step-by-step thinking</strong> - See the model's reasoning process</li>
|
| 612 |
-
<li><strong>Mathematical problems</strong> - Solve complex math with visual elements</li>
|
| 613 |
-
<li><strong>Logical deduction</strong> - Draw conclusions from visual evidence</li>
|
| 614 |
-
<li><strong>Problem decomposition</strong> - Break down complex questions</li>
|
| 615 |
-
<li><strong>Visual reasoning</strong> - Understand spatial and logical relationships</li>
|
| 616 |
-
<li><strong>Transparent thinking</strong> - Explain how conclusions are reached</li>
|
| 617 |
-
</ul>
|
| 618 |
-
</div>
|
| 619 |
-
</div>
|
| 620 |
-
|
| 621 |
-
<div class='success-box' style='margin-top: 30px; font-size: 1.05em;'>
|
| 622 |
-
<strong>π‘ Pro Tips for Best Results:</strong><br><br>
|
| 623 |
-
β
<strong>High-quality images</strong> - Use clear, well-lit photos for better OCR<br>
|
| 624 |
-
β
<strong>Multiple angles</strong> - Upload different views for complex objects<br>
|
| 625 |
-
β
<strong>Specific questions</strong> - Ask targeted questions for precise answers<br>
|
| 626 |
-
β
<strong>Enable reasoning</strong> - Turn on reasoning mode for complex analysis<br>
|
| 627 |
-
β
<strong>Sequential order</strong> - Upload video frames in chronological order<br>
|
| 628 |
-
β
<strong>Context matters</strong> - Provide background information for better understanding
|
| 629 |
-
</div>
|
| 630 |
-
|
| 631 |
-
<div style='background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; padding: 25px; border-radius: 16px; text-align: center; margin-top: 30px;'>
|
| 632 |
-
<h3 style='margin: 0 0 10px 0; font-size: 1.5em;'>π Ready to Get Started?</h3>
|
| 633 |
-
<p style='margin: 0; font-size: 1.1em; opacity: 0.95;'>
|
| 634 |
-
Upload your files in the Chat Interface tab and experience the power of Nemotron Nano 2 VL!
|
| 635 |
-
</p>
|
| 636 |
</div>
|
| 637 |
""")
|
| 638 |
|
| 639 |
-
# Examples Tab
|
| 640 |
-
with gr.Tab("π
|
| 641 |
gr.HTML("""
|
| 642 |
<div class='capability-card'>
|
| 643 |
-
<h3>π
|
| 644 |
<p><strong>Example:</strong> "Extract all the key metrics from this financial report"</p>
|
| 645 |
-
<p>
|
| 646 |
</div>
|
| 647 |
|
| 648 |
<div class='capability-card'>
|
| 649 |
-
<h3>π€ OCR
|
| 650 |
-
<p><strong>Example:</strong> "What text appears in this
|
| 651 |
-
<p>State-of-the-art optical character recognition for any text in images.</p>
|
| 652 |
</div>
|
| 653 |
|
| 654 |
<div class='capability-card'>
|
| 655 |
-
<h3>π Chart &
|
| 656 |
-
<p><strong>Example:</strong> "
|
| 657 |
-
<p>
|
| 658 |
</div>
|
| 659 |
|
| 660 |
<div class='capability-card'>
|
|
@@ -664,26 +767,26 @@ with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as app:
|
|
| 664 |
</div>
|
| 665 |
|
| 666 |
<div class='capability-card'>
|
| 667 |
-
<h3
|
| 668 |
-
<p><strong>Example:</strong> Upload
|
| 669 |
-
<p>Process
|
| 670 |
</div>
|
| 671 |
|
| 672 |
<div class='capability-card'>
|
| 673 |
-
<h3
|
| 674 |
-
<p><strong>Example:</strong>
|
| 675 |
-
<p>Handle
|
| 676 |
</div>
|
| 677 |
""")
|
| 678 |
|
| 679 |
gr.HTML("""
|
| 680 |
<div class='success-box' style='margin-top: 30px;'>
|
| 681 |
<strong>π‘ Pro Tips:</strong><br>
|
| 682 |
-
β’ Upload
|
| 683 |
-
β’ Enable reasoning mode for complex
|
| 684 |
-
β’
|
| 685 |
-
β’
|
| 686 |
-
β’
|
| 687 |
</div>
|
| 688 |
""")
|
| 689 |
|
|
@@ -744,10 +847,15 @@ with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as app:
|
|
| 744 |
outputs=[api_status]
|
| 745 |
)
|
| 746 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 747 |
submit_btn.click(
|
| 748 |
fn=process_message,
|
| 749 |
inputs=[msg, chatbot, files, enable_reasoning, temperature, max_tokens],
|
| 750 |
-
outputs=[chatbot, reasoning_display]
|
| 751 |
).then(
|
| 752 |
lambda: ("", None),
|
| 753 |
outputs=[msg, files]
|
|
@@ -756,7 +864,7 @@ with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as app:
|
|
| 756 |
msg.submit(
|
| 757 |
fn=process_message,
|
| 758 |
inputs=[msg, chatbot, files, enable_reasoning, temperature, max_tokens],
|
| 759 |
-
outputs=[chatbot, reasoning_display]
|
| 760 |
).then(
|
| 761 |
lambda: ("", None),
|
| 762 |
outputs=[msg, files]
|
|
@@ -764,11 +872,11 @@ with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as app:
|
|
| 764 |
|
| 765 |
clear_btn.click(
|
| 766 |
fn=clear_conversation,
|
| 767 |
-
outputs=[chatbot, reasoning_display]
|
| 768 |
)
|
| 769 |
|
| 770 |
# Launch the app
|
| 771 |
if __name__ == "__main__":
|
| 772 |
app.launch(
|
| 773 |
-
share=True
|
| 774 |
)
|
|
|
|
| 7 |
import time
|
| 8 |
from PIL import Image
|
| 9 |
import io
|
| 10 |
+
import sys
|
| 11 |
|
| 12 |
# Global client variable
|
| 13 |
client = None
|
|
|
|
| 32 |
with open(image_path, "rb") as image_file:
|
| 33 |
return base64.b64encode(image_file.read()).decode('utf-8')
|
| 34 |
|
| 35 |
+
def pdf_to_images_pymupdf(pdf_path: str) -> List[Image.Image]:
|
| 36 |
+
"""Convert PDF to images using PyMuPDF (primary method)"""
|
| 37 |
+
try:
|
| 38 |
+
import fitz # PyMuPDF
|
| 39 |
+
doc = fitz.open(pdf_path)
|
| 40 |
+
images = []
|
| 41 |
+
|
| 42 |
+
for page_num in range(len(doc)):
|
| 43 |
+
page = doc[page_num]
|
| 44 |
+
# Render at 2x resolution for better quality
|
| 45 |
+
mat = fitz.Matrix(2, 2)
|
| 46 |
+
pix = page.get_pixmap(matrix=mat)
|
| 47 |
+
|
| 48 |
+
# Convert to PIL Image
|
| 49 |
+
img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
|
| 50 |
+
images.append(img)
|
| 51 |
+
|
| 52 |
+
doc.close()
|
| 53 |
+
return images
|
| 54 |
+
except Exception as e:
|
| 55 |
+
raise Exception(f"PyMuPDF error: {str(e)}")
|
| 56 |
+
|
| 57 |
+
def pdf_to_images_pdf2image(pdf_path: str) -> List[Image.Image]:
|
| 58 |
+
"""Convert PDF to images using pdf2image (requires poppler)"""
|
| 59 |
try:
|
| 60 |
from pdf2image import convert_from_path
|
| 61 |
images = convert_from_path(pdf_path, dpi=200)
|
| 62 |
return images
|
| 63 |
+
except Exception as e:
|
| 64 |
+
raise Exception(f"pdf2image error: {str(e)}")
|
| 65 |
+
|
| 66 |
+
def pdf_to_images(pdf_path: str) -> Tuple[List[Image.Image], str]:
|
| 67 |
+
"""
|
| 68 |
+
Convert PDF to images with multiple fallback methods
|
| 69 |
+
Returns: (list of images, method used or error message)
|
| 70 |
+
"""
|
| 71 |
+
# Try PyMuPDF first (doesn't require poppler)
|
| 72 |
+
try:
|
| 73 |
+
images = pdf_to_images_pymupdf(pdf_path)
|
| 74 |
+
return images, "PyMuPDF"
|
| 75 |
+
except Exception as e1:
|
| 76 |
+
pymupdf_error = str(e1)
|
| 77 |
+
|
| 78 |
+
# Try pdf2image as fallback
|
| 79 |
try:
|
| 80 |
+
images = pdf_to_images_pdf2image(pdf_path)
|
| 81 |
+
return images, "pdf2image"
|
| 82 |
+
except Exception as e2:
|
| 83 |
+
pdf2image_error = str(e2)
|
| 84 |
+
|
| 85 |
+
# Both methods failed
|
| 86 |
+
error_msg = f"""PDF conversion failed. Tried multiple methods:
|
| 87 |
+
|
| 88 |
+
1. PyMuPDF: {pymupdf_error}
|
| 89 |
+
2. pdf2image: {pdf2image_error}
|
| 90 |
+
|
| 91 |
+
SOLUTION:
|
| 92 |
+
Install PyMuPDF (recommended - no external dependencies):
|
| 93 |
+
pip install PyMuPDF
|
| 94 |
+
|
| 95 |
+
OR install pdf2image + poppler:
|
| 96 |
+
pip install pdf2image
|
| 97 |
+
|
| 98 |
+
Then install poppler:
|
| 99 |
+
- Ubuntu/Debian: sudo apt-get install poppler-utils
|
| 100 |
+
- macOS: brew install poppler
|
| 101 |
+
- Windows: Download from https://github.com/oschwartz10612/poppler-windows/releases/
|
| 102 |
+
"""
|
| 103 |
+
raise Exception(error_msg)
|
| 104 |
|
| 105 |
def image_to_base64(image: Image.Image, format: str = "PNG") -> str:
|
| 106 |
"""Convert PIL Image to base64"""
|
| 107 |
buffered = io.BytesIO()
|
| 108 |
+
|
| 109 |
+
# Convert RGBA to RGB if needed
|
| 110 |
+
if image.mode == 'RGBA':
|
| 111 |
+
background = Image.new('RGB', image.size, (255, 255, 255))
|
| 112 |
+
background.paste(image, mask=image.split()[3])
|
| 113 |
+
image = background
|
| 114 |
+
elif image.mode != 'RGB':
|
| 115 |
+
image = image.convert('RGB')
|
| 116 |
+
|
| 117 |
+
image.save(buffered, format=format, quality=95)
|
| 118 |
return base64.b64encode(buffered.getvalue()).decode('utf-8')
|
| 119 |
|
| 120 |
+
def process_file(file_path: str) -> Tuple[List[dict], str]:
|
| 121 |
+
"""
|
| 122 |
+
Process a file and return content blocks for API
|
| 123 |
+
Returns: (content_blocks, status_message)
|
| 124 |
+
"""
|
| 125 |
file_extension = Path(file_path).suffix.lower()
|
| 126 |
+
file_name = Path(file_path).name
|
| 127 |
content_blocks = []
|
| 128 |
+
status_message = ""
|
| 129 |
|
| 130 |
try:
|
| 131 |
if file_extension == '.pdf':
|
| 132 |
# Convert PDF pages to images
|
| 133 |
+
images, method = pdf_to_images(file_path)
|
| 134 |
+
status_message = f"β
PDF '{file_name}' converted to {len(images)} page(s) using {method}"
|
| 135 |
+
|
| 136 |
+
for idx, img in enumerate(images, 1):
|
| 137 |
+
base64_image = image_to_base64(img, format="JPEG")
|
| 138 |
content_blocks.append({
|
| 139 |
"type": "image_url",
|
| 140 |
"image_url": {
|
| 141 |
+
"url": f"data:image/jpeg;base64,{base64_image}"
|
| 142 |
}
|
| 143 |
})
|
| 144 |
+
|
| 145 |
elif file_extension == '.txt':
|
| 146 |
# Read text file
|
| 147 |
+
try:
|
| 148 |
+
with open(file_path, 'r', encoding='utf-8') as f:
|
| 149 |
+
text_content = f.read()
|
| 150 |
+
except UnicodeDecodeError:
|
| 151 |
+
# Try with different encoding
|
| 152 |
+
with open(file_path, 'r', encoding='latin-1') as f:
|
| 153 |
+
text_content = f.read()
|
| 154 |
+
|
| 155 |
+
status_message = f"β
Text file '{file_name}' loaded ({len(text_content)} characters)"
|
| 156 |
content_blocks.append({
|
| 157 |
"type": "text",
|
| 158 |
+
"text": f"π Content from '{file_name}':\n\n{text_content}"
|
| 159 |
})
|
| 160 |
+
|
| 161 |
else:
|
| 162 |
# Handle image files
|
| 163 |
# Determine MIME type
|
|
|
|
| 173 |
elif file_extension in ['.tiff', '.tif']:
|
| 174 |
mime_type = "image/tiff"
|
| 175 |
|
| 176 |
+
# Load and potentially convert the image
|
| 177 |
+
try:
|
| 178 |
+
img = Image.open(file_path)
|
| 179 |
+
|
| 180 |
+
# Convert to RGB if necessary
|
| 181 |
+
if img.mode in ('RGBA', 'LA', 'P'):
|
| 182 |
+
background = Image.new('RGB', img.size, (255, 255, 255))
|
| 183 |
+
if img.mode == 'P':
|
| 184 |
+
img = img.convert('RGBA')
|
| 185 |
+
if img.mode in ('RGBA', 'LA'):
|
| 186 |
+
background.paste(img, mask=img.split()[-1] if img.mode in ('RGBA', 'LA') else None)
|
| 187 |
+
img = background
|
| 188 |
+
elif img.mode != 'RGB':
|
| 189 |
+
img = img.convert('RGB')
|
| 190 |
+
|
| 191 |
+
# Convert to base64
|
| 192 |
+
base64_image = image_to_base64(img, format="JPEG")
|
| 193 |
+
|
| 194 |
+
status_message = f"β
Image '{file_name}' loaded ({img.width}x{img.height})"
|
| 195 |
+
content_blocks.append({
|
| 196 |
+
"type": "image_url",
|
| 197 |
+
"image_url": {
|
| 198 |
+
"url": f"data:image/jpeg;base64,{base64_image}"
|
| 199 |
+
}
|
| 200 |
+
})
|
| 201 |
+
except Exception as img_error:
|
| 202 |
+
# If image processing fails, try direct base64 encoding
|
| 203 |
+
base64_image = encode_image(file_path)
|
| 204 |
+
status_message = f"β
Image '{file_name}' loaded (direct encoding)"
|
| 205 |
+
content_blocks.append({
|
| 206 |
+
"type": "image_url",
|
| 207 |
+
"image_url": {
|
| 208 |
+
"url": f"data:{mime_type};base64,{base64_image}"
|
| 209 |
+
}
|
| 210 |
+
})
|
| 211 |
+
|
| 212 |
except Exception as e:
|
| 213 |
+
error_msg = f"β Error processing '{file_name}': {str(e)}"
|
| 214 |
content_blocks.append({
|
| 215 |
"type": "text",
|
| 216 |
+
"text": error_msg
|
| 217 |
})
|
| 218 |
+
status_message = error_msg
|
| 219 |
|
| 220 |
+
return content_blocks, status_message
|
| 221 |
|
| 222 |
def process_message(
|
| 223 |
message: str,
|
|
|
|
| 226 |
enable_reasoning: bool = True,
|
| 227 |
temperature: float = 0.7,
|
| 228 |
max_tokens: int = 2000
|
| 229 |
+
) -> Tuple[List[Tuple[str, str]], str, str]:
|
| 230 |
+
"""
|
| 231 |
+
Process user message and generate response
|
| 232 |
+
Returns: (updated_history, reasoning_text, status_message)
|
| 233 |
+
"""
|
| 234 |
global client
|
| 235 |
|
| 236 |
if client is None:
|
| 237 |
+
return history + [(message, "β Please configure your API key first in the Settings tab.")], "", ""
|
| 238 |
|
| 239 |
if not message.strip() and not files:
|
| 240 |
+
return history + [(message, "β οΈ Please enter a message or upload files.")], "", ""
|
| 241 |
+
|
| 242 |
+
status_messages = []
|
| 243 |
|
| 244 |
try:
|
| 245 |
# Build messages array
|
|
|
|
| 257 |
# Process files if provided
|
| 258 |
if files:
|
| 259 |
file_count = 0
|
| 260 |
+
total_pages = 0
|
| 261 |
+
|
| 262 |
for file in files:
|
| 263 |
if file is not None:
|
| 264 |
+
file_blocks, status = process_file(file)
|
| 265 |
content.extend(file_blocks)
|
| 266 |
+
status_messages.append(status)
|
| 267 |
file_count += 1
|
| 268 |
+
|
| 269 |
+
# Count pages for PDFs
|
| 270 |
+
if status.startswith("β
") and "page(s)" in status:
|
| 271 |
+
try:
|
| 272 |
+
pages = int(status.split("converted to ")[1].split(" page(s)")[0])
|
| 273 |
+
total_pages += pages
|
| 274 |
+
except:
|
| 275 |
+
pass
|
| 276 |
|
| 277 |
if file_count > 0:
|
| 278 |
+
file_summary = f"π {file_count} file(s) uploaded"
|
| 279 |
+
if total_pages > 0:
|
| 280 |
+
file_summary += f" ({total_pages} PDF pages)"
|
| 281 |
+
content.insert(0, {"type": "text", "text": file_summary})
|
| 282 |
|
| 283 |
# Add text message
|
| 284 |
if message.strip():
|
|
|
|
| 308 |
if enable_reasoning and hasattr(response.choices[0].message, 'reasoning_details'):
|
| 309 |
reasoning_details = response.choices[0].message.reasoning_details
|
| 310 |
if reasoning_details:
|
| 311 |
+
reasoning_text = f"**π§ Reasoning Process:**\n{json.dumps(reasoning_details, indent=2)}"
|
| 312 |
|
| 313 |
# Update history
|
| 314 |
new_history = history + [(message, assistant_message)]
|
| 315 |
|
| 316 |
+
# Combine status messages
|
| 317 |
+
combined_status = "\n".join(status_messages) if status_messages else "β
Message processed successfully"
|
| 318 |
+
|
| 319 |
+
return new_history, reasoning_text, combined_status
|
| 320 |
|
| 321 |
except Exception as e:
|
| 322 |
error_message = f"β Error: {str(e)}"
|
| 323 |
+
return history + [(message, error_message)], "", error_message
|
| 324 |
|
| 325 |
def clear_conversation():
|
| 326 |
"""Clear conversation history"""
|
| 327 |
+
return [], "", ""
|
| 328 |
+
|
| 329 |
+
def check_dependencies() -> str:
|
| 330 |
+
"""Check which PDF processing libraries are available"""
|
| 331 |
+
status = "**π¦ PDF Processing Dependencies Status:**\n\n"
|
| 332 |
+
|
| 333 |
+
# Check PyMuPDF
|
| 334 |
+
try:
|
| 335 |
+
import fitz
|
| 336 |
+
status += "β
**PyMuPDF (fitz)**: Installed and ready!\n"
|
| 337 |
+
status += " - No external dependencies needed\n"
|
| 338 |
+
status += " - This is the primary PDF processing method\n\n"
|
| 339 |
+
except ImportError:
|
| 340 |
+
status += "β **PyMuPDF (fitz)**: Not installed\n"
|
| 341 |
+
status += " - Install: `pip install PyMuPDF`\n\n"
|
| 342 |
+
|
| 343 |
+
# Check pdf2image
|
| 344 |
+
try:
|
| 345 |
+
import pdf2image
|
| 346 |
+
status += "β
**pdf2image**: Installed\n"
|
| 347 |
+
status += " - Requires poppler-utils (external)\n"
|
| 348 |
+
|
| 349 |
+
# Try to check if poppler is available
|
| 350 |
+
try:
|
| 351 |
+
from pdf2image.exceptions import PDFInfoNotInstalledError
|
| 352 |
+
from pdf2image import pdfinfo_from_path
|
| 353 |
+
# This will throw an error if poppler is not found
|
| 354 |
+
status += " - Checking poppler availability...\n"
|
| 355 |
+
except:
|
| 356 |
+
status += " - β οΈ poppler-utils may not be installed\n"
|
| 357 |
+
|
| 358 |
+
status += "\n"
|
| 359 |
+
except ImportError:
|
| 360 |
+
status += "β οΈ **pdf2image**: Not installed (optional fallback)\n"
|
| 361 |
+
status += " - Install: `pip install pdf2image`\n\n"
|
| 362 |
+
|
| 363 |
+
# Check PIL/Pillow
|
| 364 |
+
try:
|
| 365 |
+
from PIL import Image
|
| 366 |
+
status += "β
**Pillow (PIL)**: Installed and ready!\n\n"
|
| 367 |
+
except ImportError:
|
| 368 |
+
status += "β **Pillow (PIL)**: Not installed\n"
|
| 369 |
+
status += " - Install: `pip install Pillow`\n\n"
|
| 370 |
+
|
| 371 |
+
status += "**π‘ Recommendation:**\n"
|
| 372 |
+
status += "Install PyMuPDF for the best PDF support:\n"
|
| 373 |
+
status += "`pip install PyMuPDF Pillow`"
|
| 374 |
+
|
| 375 |
+
return status
|
| 376 |
|
| 377 |
# Custom CSS for premium design
|
| 378 |
custom_css = """
|
|
|
|
| 547 |
elem_classes=["chatbot"]
|
| 548 |
)
|
| 549 |
|
| 550 |
+
file_status = gr.Textbox(
|
| 551 |
+
label="π File Processing Status",
|
| 552 |
+
lines=2,
|
| 553 |
+
interactive=False,
|
| 554 |
+
visible=True
|
| 555 |
+
)
|
| 556 |
+
|
| 557 |
with gr.Row():
|
| 558 |
msg = gr.Textbox(
|
| 559 |
label="Your Message",
|
|
|
|
| 633 |
info="Maximum length of response"
|
| 634 |
)
|
| 635 |
|
| 636 |
+
gr.HTML("<hr style='margin: 30px 0; border: none; border-top: 2px solid #e0e7ff;'>")
|
| 637 |
+
|
| 638 |
+
gr.HTML("""
|
| 639 |
+
<div class='info-box'>
|
| 640 |
+
<strong>π¦ Check Dependencies</strong><br>
|
| 641 |
+
Verify that PDF processing libraries are installed
|
| 642 |
+
</div>
|
| 643 |
+
""")
|
| 644 |
+
|
| 645 |
+
check_deps_btn = gr.Button("π Check Dependencies", variant="secondary", elem_classes=["secondary"])
|
| 646 |
+
deps_status = gr.Markdown(label="Dependency Status")
|
| 647 |
+
|
| 648 |
gr.HTML("""
|
| 649 |
<div class='info-box' style='margin-top: 20px;'>
|
| 650 |
+
<strong>π¦ Installation Guide:</strong><br><br>
|
| 651 |
+
<strong>Recommended (PyMuPDF - No external dependencies):</strong><br>
|
| 652 |
+
<code>pip install PyMuPDF Pillow openai gradio</code><br><br>
|
| 653 |
+
|
| 654 |
+
<strong>Alternative (pdf2image - Requires poppler):</strong><br>
|
| 655 |
+
<code>pip install pdf2image Pillow openai gradio</code><br><br>
|
| 656 |
+
|
| 657 |
+
<strong>Poppler installation (for pdf2image):</strong><br>
|
| 658 |
+
β’ Ubuntu/Debian: <code>sudo apt-get install poppler-utils</code><br>
|
| 659 |
+
β’ macOS: <code>brew install poppler</code><br>
|
| 660 |
+
β’ Windows: Download from <a href="https://github.com/oschwartz10612/poppler-windows/releases/" target="_blank">GitHub</a>
|
| 661 |
</div>
|
| 662 |
""")
|
| 663 |
|
|
|
|
| 711 |
<p style='margin: 10px 0 0 0; color: #666; line-height: 1.6;'>
|
| 712 |
β’ Multi-page support<br>
|
| 713 |
β’ Automatic conversion to images<br>
|
| 714 |
+
β’ PyMuPDF (recommended)<br>
|
| 715 |
β’ Scanned documents<br>
|
| 716 |
β’ Forms and tables
|
| 717 |
</p>
|
|
|
|
| 722 |
β’ Plain text documents<br>
|
| 723 |
β’ Code snippets<br>
|
| 724 |
β’ Notes and logs<br>
|
| 725 |
+
β’ UTF-8 encoding<br>
|
| 726 |
β’ Configuration files
|
| 727 |
</p>
|
| 728 |
</div>
|
| 729 |
</div>
|
| 730 |
</div>
|
| 731 |
|
| 732 |
+
<div class='success-box' style='margin-top: 20px;'>
|
| 733 |
+
<strong>π PDF Processing:</strong><br>
|
| 734 |
+
This app uses <strong>PyMuPDF (fitz)</strong> as the primary method for PDF conversion.<br>
|
| 735 |
+
β’ β
No external dependencies (no poppler needed)<br>
|
| 736 |
+
β’ β
Fast and reliable<br>
|
| 737 |
+
β’ β
Automatic fallback to pdf2image if needed<br>
|
| 738 |
+
β’ β
Clear error messages with installation instructions
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 739 |
</div>
|
| 740 |
""")
|
| 741 |
|
| 742 |
+
# Examples Tab
|
| 743 |
+
with gr.Tab("π Use Cases", elem_classes=["tab-nav"]):
|
| 744 |
gr.HTML("""
|
| 745 |
<div class='capability-card'>
|
| 746 |
+
<h3>π Financial Report Analysis</h3>
|
| 747 |
<p><strong>Example:</strong> "Extract all the key metrics from this financial report"</p>
|
| 748 |
+
<p><strong>What it extracts:</strong> Revenue, Net Profit, EBITDA, Cash Flow, Assets, Liabilities, Ratios, YoY Growth</p>
|
| 749 |
</div>
|
| 750 |
|
| 751 |
<div class='capability-card'>
|
| 752 |
+
<h3>π€ OCR & Text Extraction</h3>
|
| 753 |
+
<p><strong>Example:</strong> "What text appears in this scanned document?"</p>
|
| 754 |
+
<p>State-of-the-art optical character recognition for any text in images or PDFs.</p>
|
| 755 |
</div>
|
| 756 |
|
| 757 |
<div class='capability-card'>
|
| 758 |
+
<h3>π Chart & Data Visualization</h3>
|
| 759 |
+
<p><strong>Example:</strong> "Analyze the trends in these charts"</p>
|
| 760 |
+
<p>Understand bar charts, line graphs, pie charts, scatter plots, and complex visualizations.</p>
|
| 761 |
</div>
|
| 762 |
|
| 763 |
<div class='capability-card'>
|
|
|
|
| 767 |
</div>
|
| 768 |
|
| 769 |
<div class='capability-card'>
|
| 770 |
+
<h3>π Multi-Page Documents</h3>
|
| 771 |
+
<p><strong>Example:</strong> Upload a PDF and ask "Summarize the key points from all pages"</p>
|
| 772 |
+
<p>Process entire documents with multiple pages simultaneously.</p>
|
| 773 |
</div>
|
| 774 |
|
| 775 |
<div class='capability-card'>
|
| 776 |
+
<h3>π’ Business Document Processing</h3>
|
| 777 |
+
<p><strong>Example:</strong> "Extract information from this invoice/receipt/form"</p>
|
| 778 |
+
<p>Handle invoices, receipts, forms, contracts, and structured business documents.</p>
|
| 779 |
</div>
|
| 780 |
""")
|
| 781 |
|
| 782 |
gr.HTML("""
|
| 783 |
<div class='success-box' style='margin-top: 30px;'>
|
| 784 |
<strong>π‘ Pro Tips:</strong><br>
|
| 785 |
+
β’ Upload high-quality scans for best OCR results<br>
|
| 786 |
+
β’ Enable reasoning mode for complex financial analysis<br>
|
| 787 |
+
β’ Ask specific questions to get targeted information<br>
|
| 788 |
+
β’ Upload multiple related documents for comparison<br>
|
| 789 |
+
β’ Use clear, descriptive questions for better answers
|
| 790 |
</div>
|
| 791 |
""")
|
| 792 |
|
|
|
|
| 847 |
outputs=[api_status]
|
| 848 |
)
|
| 849 |
|
| 850 |
+
check_deps_btn.click(
|
| 851 |
+
fn=check_dependencies,
|
| 852 |
+
outputs=[deps_status]
|
| 853 |
+
)
|
| 854 |
+
|
| 855 |
submit_btn.click(
|
| 856 |
fn=process_message,
|
| 857 |
inputs=[msg, chatbot, files, enable_reasoning, temperature, max_tokens],
|
| 858 |
+
outputs=[chatbot, reasoning_display, file_status]
|
| 859 |
).then(
|
| 860 |
lambda: ("", None),
|
| 861 |
outputs=[msg, files]
|
|
|
|
| 864 |
msg.submit(
|
| 865 |
fn=process_message,
|
| 866 |
inputs=[msg, chatbot, files, enable_reasoning, temperature, max_tokens],
|
| 867 |
+
outputs=[chatbot, reasoning_display, file_status]
|
| 868 |
).then(
|
| 869 |
lambda: ("", None),
|
| 870 |
outputs=[msg, files]
|
|
|
|
| 872 |
|
| 873 |
clear_btn.click(
|
| 874 |
fn=clear_conversation,
|
| 875 |
+
outputs=[chatbot, reasoning_display, file_status]
|
| 876 |
)
|
| 877 |
|
| 878 |
# Launch the app
|
| 879 |
if __name__ == "__main__":
|
| 880 |
app.launch(
|
| 881 |
+
share=True
|
| 882 |
)
|