Spaces:
Sleeping
Sleeping
Commit
·
3fd9d26
1
Parent(s):
f403b46
Initial commit
Browse files- .gitattributes +3 -0
- .gitignore +4 -0
- README.md +97 -8
- app.py +381 -0
- packages.txt +2 -0
- requirements.txt +10 -0
- sample_images/chartqa_sample1.jpeg +3 -0
- sample_images/docvqa_sample1.png +3 -0
- sample_images/docvqa_sample2.png +3 -0
- sample_images/infovqa_sample1.jpeg +3 -0
- sample_images/textvqa_sample1.jpg +3 -0
- sample_images/vqav2_sample1.png +3 -0
- samples.json +44 -0
- src/__init__.py +1 -0
- src/dam_models.py +260 -0
- static/aivn_logo.png +3 -0
- static/vlai_logo.png +3 -0
- vlai_template.py +240 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
*.png filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
*.jpg filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
*.jpeg filter=lfs diff=lfs merge=lfs -text
|
.gitignore
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
__pycache__/
|
| 2 |
+
__MACOSX/
|
| 3 |
+
|
| 4 |
+
.DS_Store
|
README.md
CHANGED
|
@@ -1,12 +1,101 @@
|
|
| 1 |
---
|
| 2 |
-
title: DAM-QA Demo
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom: blue
|
| 5 |
-
colorTo:
|
| 6 |
-
sdk: gradio
|
| 7 |
-
sdk_version: 5.
|
| 8 |
-
app_file: app.py
|
| 9 |
pinned: false
|
| 10 |
---
|
| 11 |
|
| 12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
+
title: "DAM vs DAM-QA Comparison Demo"
|
| 3 |
+
emoji: "🤖"
|
| 4 |
+
colorFrom: "blue"
|
| 5 |
+
colorTo: "red"
|
| 6 |
+
sdk: "gradio"
|
| 7 |
+
sdk_version: "5.38.0"
|
| 8 |
+
app_file: "app.py"
|
| 9 |
pinned: false
|
| 10 |
---
|
| 11 |
|
| 12 |
+
# 🤖 DAM vs DAM-QA Visual Question Answering Demo
|
| 13 |
+
|
| 14 |
+
An interactive demo that compares DAM (Original) and DAM-QA (Sliding Window) models on Visual Question Answering tasks for text-rich images.
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
## 🚀 Quick Start
|
| 18 |
+
|
| 19 |
+
### Local Installation
|
| 20 |
+
```bash
|
| 21 |
+
git clone <repository-url>
|
| 22 |
+
cd DAM-QA-Demo
|
| 23 |
+
pip install -r requirements.txt
|
| 24 |
+
python app.py
|
| 25 |
+
```
|
| 26 |
+
|
| 27 |
+
### Usage
|
| 28 |
+
1. **Ensure GPU**: Models require CUDA-compatible GPU with 8GB+ memory
|
| 29 |
+
2. Launch the app: `python app.py`
|
| 30 |
+
3. Wait for models to load (status will update automatically)
|
| 31 |
+
4. Choose a sample from dropdown OR upload your own image
|
| 32 |
+
5. Enter a question about the image (or use auto-filled sample question)
|
| 33 |
+
6. Click "Compare Models" to see both DAM Original and DAM-QA results
|
| 34 |
+
7. Analyze the detailed voting breakdown for DAM-QA's sliding window approach
|
| 35 |
+
|
| 36 |
+
### ⚠️ Hardware Requirements
|
| 37 |
+
- **GPU**: CUDA-compatible with 8GB+ VRAM recommended
|
| 38 |
+
- **CPU**: Multi-core processor for fallback (much slower)
|
| 39 |
+
- **RAM**: 16GB+ system memory recommended
|
| 40 |
+
|
| 41 |
+
## 🧠 Technical Highlights
|
| 42 |
+
|
| 43 |
+
- **DAM Original**: Uses the full image with NVIDIA's DAM-3B-Self-Contained model
|
| 44 |
+
- **DAM-QA Sliding Window**: Implements sliding window approach with weighted voting aggregation
|
| 45 |
+
- **Model Architecture**: Transformer-based visual language model with attention mechanisms
|
| 46 |
+
- **Inference**: Supports both GPU and CPU inference with automatic device selection
|
| 47 |
+
- **UI Framework**: Built with Gradio and custom VLAI template for professional presentation
|
| 48 |
+
|
| 49 |
+
## 📋 Requirements
|
| 50 |
+
|
| 51 |
+
- Python 3.10+
|
| 52 |
+
- PyTorch 2.0+
|
| 53 |
+
- Transformers 4.30+
|
| 54 |
+
- Gradio 5.38+
|
| 55 |
+
- CUDA-compatible GPU (recommended)
|
| 56 |
+
- 8GB+ GPU memory for optimal performance
|
| 57 |
+
|
| 58 |
+
## 🎨 Theming & Branding
|
| 59 |
+
|
| 60 |
+
The UI is powered by `vlai_template.py` and can be customized programmatically:
|
| 61 |
+
|
| 62 |
+
```python
|
| 63 |
+
import vlai_template as vt
|
| 64 |
+
|
| 65 |
+
vt.configure(
|
| 66 |
+
project_name="DAM vs DAM-QA Comparison Demo",
|
| 67 |
+
year="2025",
|
| 68 |
+
module="DAM",
|
| 69 |
+
description=(
|
| 70 |
+
"Compare DAM (Original) and DAM-QA (Sliding Window) performance "
|
| 71 |
+
"on Visual Question Answering tasks"
|
| 72 |
+
),
|
| 73 |
+
colors={
|
| 74 |
+
"primary": "#0F6CBD",
|
| 75 |
+
"accent": "#C4314B",
|
| 76 |
+
"bg1": "#F0F7FF",
|
| 77 |
+
"bg2": "#E8F0FA",
|
| 78 |
+
"bg3": "#DDE7F8",
|
| 79 |
+
},
|
| 80 |
+
font_family=(
|
| 81 |
+
"'Inter', -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, "
|
| 82 |
+
"'Helvetica Neue', Arial, 'Noto Sans', 'Liberation Sans', sans-serif"
|
| 83 |
+
),
|
| 84 |
+
meta_items=[
|
| 85 |
+
("Original DAM", "Full image processing"),
|
| 86 |
+
("DAM-QA", "Sliding window + voting"),
|
| 87 |
+
("Datasets", "DocVQA, InfographicVQA, TextVQA, ChartQA, VQAv2"),
|
| 88 |
+
],
|
| 89 |
+
)
|
| 90 |
+
```
|
| 91 |
+
|
| 92 |
+
## 📊 Datasets Used
|
| 93 |
+
|
| 94 |
+
This demo includes sample images and questions from:
|
| 95 |
+
|
| 96 |
+
- **DocVQA**: Document visual question answering
|
| 97 |
+
- **InfographicVQA**: Infographic-based questions
|
| 98 |
+
- **TextVQA**: Scene text visual question answering
|
| 99 |
+
- **ChartQA**: Chart and graph question answering
|
| 100 |
+
- **VQAv2**: General visual question answering
|
| 101 |
+
|
app.py
ADDED
|
@@ -0,0 +1,381 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import json
|
| 3 |
+
import gradio as gr
|
| 4 |
+
import plotly.graph_objects as go
|
| 5 |
+
import pandas as pd
|
| 6 |
+
import time
|
| 7 |
+
from PIL import Image
|
| 8 |
+
import vlai_template
|
| 9 |
+
|
| 10 |
+
from src.dam_models import get_dam_original, get_dam_sliding
|
| 11 |
+
|
| 12 |
+
# App configuration
|
| 13 |
+
vlai_template.set_meta(
|
| 14 |
+
project_name="DAM-QA Demo",
|
| 15 |
+
year="2025",
|
| 16 |
+
module="DAM",
|
| 17 |
+
description="DAM-QA performance on Visual Question Answering tasks",
|
| 18 |
+
meta_items=[
|
| 19 |
+
("Original DAM", "Full image processing"),
|
| 20 |
+
("DAM-QA", "Sliding window + voting"),
|
| 21 |
+
("Datasets", "DocVQA, InfographicVQA, TextVQA, ChartQA, VQAv2"),
|
| 22 |
+
],
|
| 23 |
+
)
|
| 24 |
+
|
| 25 |
+
# Global state for models
|
| 26 |
+
STATE = {
|
| 27 |
+
"dam_original": None,
|
| 28 |
+
"dam_sliding": None,
|
| 29 |
+
"samples": []
|
| 30 |
+
}
|
| 31 |
+
|
| 32 |
+
# Load sample data
|
| 33 |
+
def load_samples():
|
| 34 |
+
"""Load sample questions and images."""
|
| 35 |
+
try:
|
| 36 |
+
with open("samples.json", "r") as f:
|
| 37 |
+
samples = json.load(f)
|
| 38 |
+
STATE["samples"] = samples
|
| 39 |
+
return samples
|
| 40 |
+
except Exception as e:
|
| 41 |
+
print(f"Error loading samples: {e}")
|
| 42 |
+
return []
|
| 43 |
+
|
| 44 |
+
def init_models():
|
| 45 |
+
"""Initialize both DAM models."""
|
| 46 |
+
try:
|
| 47 |
+
STATE["dam_original"] = get_dam_original()
|
| 48 |
+
STATE["dam_sliding"] = get_dam_sliding()
|
| 49 |
+
return "✅ Both DAM models loaded successfully!"
|
| 50 |
+
except Exception as e:
|
| 51 |
+
error_msg = f"❌ Error loading models: {str(e)}"
|
| 52 |
+
print(error_msg)
|
| 53 |
+
return error_msg
|
| 54 |
+
|
| 55 |
+
def get_sample_choices():
|
| 56 |
+
"""Get list of sample choices for dropdown."""
|
| 57 |
+
samples = STATE["samples"]
|
| 58 |
+
choices = []
|
| 59 |
+
for i, sample in enumerate(samples):
|
| 60 |
+
label = f"{sample['dataset']}: {sample['question'][:50]}..."
|
| 61 |
+
choices.append((label, i))
|
| 62 |
+
return choices
|
| 63 |
+
|
| 64 |
+
def fill_from_sample(sample_idx):
|
| 65 |
+
"""Fill inputs from selected sample."""
|
| 66 |
+
if not STATE["samples"] or sample_idx is None or sample_idx >= len(STATE["samples"]):
|
| 67 |
+
return None, "", "", None, ""
|
| 68 |
+
|
| 69 |
+
sample = STATE["samples"][sample_idx]
|
| 70 |
+
# Load the sample image
|
| 71 |
+
try:
|
| 72 |
+
sample_img = Image.open(sample["image"])
|
| 73 |
+
return (
|
| 74 |
+
sample_img, # sample_image_display
|
| 75 |
+
sample["ground_truth"], # ground_truth_display
|
| 76 |
+
f"Dataset: {sample['dataset']}\nDescription: {sample['description']}", # sample_info_display
|
| 77 |
+
sample_img, # image_input (copy to main input)
|
| 78 |
+
sample["question"] # question_input (copy to main input)
|
| 79 |
+
)
|
| 80 |
+
except Exception as e:
|
| 81 |
+
print(f"Error loading sample image {sample['image']}: {e}")
|
| 82 |
+
return None, sample["ground_truth"], f"Error loading image: {e}", None, sample["question"]
|
| 83 |
+
|
| 84 |
+
def compare_models(image, question, max_tokens):
|
| 85 |
+
"""Compare both models on the same input."""
|
| 86 |
+
if STATE["dam_original"] is None or STATE["dam_sliding"] is None:
|
| 87 |
+
return "❌ Models not loaded. Please wait for models to initialize.", "", "", None, ""
|
| 88 |
+
|
| 89 |
+
if image is None:
|
| 90 |
+
return "❌ Please provide an image", "", "", None, ""
|
| 91 |
+
|
| 92 |
+
if not question or not question.strip():
|
| 93 |
+
return "❌ Please provide a question", "", "", None, ""
|
| 94 |
+
|
| 95 |
+
try:
|
| 96 |
+
# Convert to PIL Image if needed
|
| 97 |
+
if isinstance(image, str):
|
| 98 |
+
img = Image.open(image)
|
| 99 |
+
elif hasattr(image, 'save'): # PIL Image
|
| 100 |
+
img = image
|
| 101 |
+
else:
|
| 102 |
+
return "❌ Invalid image format", "", "", None, ""
|
| 103 |
+
|
| 104 |
+
# DAM Original prediction
|
| 105 |
+
original_answer, original_time = STATE["dam_original"].predict(
|
| 106 |
+
img, question, max_tokens
|
| 107 |
+
)
|
| 108 |
+
|
| 109 |
+
# DAM Sliding Window prediction
|
| 110 |
+
sliding_answer, sliding_time, voting_details = STATE["dam_sliding"].predict(
|
| 111 |
+
img, question, max_tokens
|
| 112 |
+
)
|
| 113 |
+
|
| 114 |
+
# Format results
|
| 115 |
+
original_result = f"""
|
| 116 |
+
### 🔍 DAM Original (Full Image)
|
| 117 |
+
**Answer:** {original_answer}
|
| 118 |
+
**Inference Time:** {original_time:.2f}s
|
| 119 |
+
**Method:** Processes the entire image at once
|
| 120 |
+
"""
|
| 121 |
+
|
| 122 |
+
sliding_result = f"""
|
| 123 |
+
### 🧩 DAM-QA (Sliding Window + Voting)
|
| 124 |
+
**Answer:** {sliding_answer}
|
| 125 |
+
**Inference Time:** {sliding_time:.2f}s
|
| 126 |
+
**Method:** Sliding windows with weighted voting
|
| 127 |
+
**Total Windows:** {voting_details.get('total_windows', 'N/A')}
|
| 128 |
+
"""
|
| 129 |
+
|
| 130 |
+
# Create comparison summary
|
| 131 |
+
comparison = f"""
|
| 132 |
+
## 📊 Comparison Summary
|
| 133 |
+
|
| 134 |
+
| Method | Answer | Time (s) | Approach |
|
| 135 |
+
|--------|--------|----------|----------|
|
| 136 |
+
| DAM Original | {original_answer} | {original_time:.2f} | Full image |
|
| 137 |
+
| DAM-QA Sliding | {sliding_answer} | {sliding_time:.2f} | Window + voting |
|
| 138 |
+
|
| 139 |
+
**Speed Difference:** {abs(original_time - sliding_time):.2f}s
|
| 140 |
+
**Faster Method:** {'DAM Original' if original_time < sliding_time else 'DAM-QA'}
|
| 141 |
+
"""
|
| 142 |
+
|
| 143 |
+
# Create voting visualization
|
| 144 |
+
vote_fig = create_voting_chart(voting_details)
|
| 145 |
+
|
| 146 |
+
# Detailed voting info
|
| 147 |
+
voting_info = format_voting_details(voting_details)
|
| 148 |
+
|
| 149 |
+
return comparison, original_result, sliding_result, vote_fig, voting_info
|
| 150 |
+
|
| 151 |
+
except Exception as e:
|
| 152 |
+
error_msg = f"❌ Error during inference: {str(e)}"
|
| 153 |
+
return error_msg, "", "", None, ""
|
| 154 |
+
|
| 155 |
+
def create_voting_chart(voting_details):
|
| 156 |
+
"""Create a visualization of the voting process."""
|
| 157 |
+
if not voting_details or "vote_summary" not in voting_details:
|
| 158 |
+
return None
|
| 159 |
+
|
| 160 |
+
votes = voting_details["vote_summary"]
|
| 161 |
+
if not votes:
|
| 162 |
+
return None
|
| 163 |
+
|
| 164 |
+
answers = list(votes.keys())
|
| 165 |
+
weights = list(votes.values())
|
| 166 |
+
|
| 167 |
+
# Create bar chart
|
| 168 |
+
fig = go.Figure(data=[
|
| 169 |
+
go.Bar(
|
| 170 |
+
x=answers,
|
| 171 |
+
y=weights,
|
| 172 |
+
text=[f"{w:.3f}" for w in weights],
|
| 173 |
+
textposition='auto',
|
| 174 |
+
marker_color=['#C4314B' if ans == voting_details.get('final_answer', '') else '#0F6CBD' for ans in answers]
|
| 175 |
+
)
|
| 176 |
+
])
|
| 177 |
+
|
| 178 |
+
fig.update_layout(
|
| 179 |
+
title="DAM-QA Voting Results",
|
| 180 |
+
xaxis_title="Answers",
|
| 181 |
+
yaxis_title="Vote Weight",
|
| 182 |
+
plot_bgcolor="white",
|
| 183 |
+
paper_bgcolor="white",
|
| 184 |
+
font=dict(color="black", size=12),
|
| 185 |
+
height=400,
|
| 186 |
+
margin=dict(l=30, r=20, t=60, b=40)
|
| 187 |
+
)
|
| 188 |
+
|
| 189 |
+
return fig
|
| 190 |
+
|
| 191 |
+
def format_voting_details(voting_details):
|
| 192 |
+
"""Format detailed voting information."""
|
| 193 |
+
if not voting_details:
|
| 194 |
+
return "No voting details available."
|
| 195 |
+
|
| 196 |
+
details = []
|
| 197 |
+
|
| 198 |
+
# Full image vote
|
| 199 |
+
if "full_image" in voting_details and voting_details["full_image"]:
|
| 200 |
+
full_vote = voting_details["full_image"]
|
| 201 |
+
details.append(f"**Full Image Vote:**")
|
| 202 |
+
details.append(f"- Answer: {full_vote['answer']}")
|
| 203 |
+
details.append(f"- Weight: {full_vote['weight']:.3f}")
|
| 204 |
+
details.append("")
|
| 205 |
+
|
| 206 |
+
# Window votes summary
|
| 207 |
+
if "windows" in voting_details:
|
| 208 |
+
windows = voting_details["windows"]
|
| 209 |
+
details.append(f"**Window Votes:** {len(windows)} windows processed")
|
| 210 |
+
|
| 211 |
+
# Group by answer
|
| 212 |
+
answer_groups = {}
|
| 213 |
+
for window in windows:
|
| 214 |
+
ans = window["answer"]
|
| 215 |
+
if ans not in answer_groups:
|
| 216 |
+
answer_groups[ans] = []
|
| 217 |
+
answer_groups[ans].append(window)
|
| 218 |
+
|
| 219 |
+
for answer, windows_for_ans in answer_groups.items():
|
| 220 |
+
total_weight = sum(w["weight"] for w in windows_for_ans)
|
| 221 |
+
details.append(f"- **{answer}**: {len(windows_for_ans)} windows, total weight: {total_weight:.3f}")
|
| 222 |
+
details.append("")
|
| 223 |
+
|
| 224 |
+
# Final summary
|
| 225 |
+
if "vote_summary" in voting_details:
|
| 226 |
+
details.append("**Final Vote Tally:**")
|
| 227 |
+
for answer, weight in voting_details["vote_summary"].items():
|
| 228 |
+
marker = "🏆" if answer == voting_details.get("final_answer", "") else " "
|
| 229 |
+
details.append(f"{marker} {answer}: {weight:.3f}")
|
| 230 |
+
|
| 231 |
+
return "\n".join(details)
|
| 232 |
+
|
| 233 |
+
# Force light theme
|
| 234 |
+
force_light_theme_js = """
|
| 235 |
+
() => {
|
| 236 |
+
const params = new URLSearchParams(window.location.search);
|
| 237 |
+
if (!params.has('__theme')) {
|
| 238 |
+
params.set('__theme', 'light');
|
| 239 |
+
window.location.search = params.toString();
|
| 240 |
+
}
|
| 241 |
+
}
|
| 242 |
+
"""
|
| 243 |
+
|
| 244 |
+
# Main Gradio interface
|
| 245 |
+
with gr.Blocks(theme="gstaff/sketch", css=vlai_template.custom_css, fill_width=True, js=force_light_theme_js) as demo:
|
| 246 |
+
vlai_template.create_header()
|
| 247 |
+
|
| 248 |
+
gr.HTML(vlai_template.render_info_card(
|
| 249 |
+
icon="🤖",
|
| 250 |
+
title="About this Demo",
|
| 251 |
+
description="This demo compares two approaches for Visual Question Answering: DAM (original) processes the full image, while DAM-QA uses a sliding window approach with weighted voting to better handle text-rich images."
|
| 252 |
+
))
|
| 253 |
+
|
| 254 |
+
gr.HTML(vlai_template.render_disclaimer(
|
| 255 |
+
text=(
|
| 256 |
+
"This demo is for research and educational purposes only. "
|
| 257 |
+
"The models are designed for visual question answering on text-rich images. "
|
| 258 |
+
"Results may vary based on image quality and question complexity."
|
| 259 |
+
)
|
| 260 |
+
))
|
| 261 |
+
|
| 262 |
+
gr.Markdown("### 🎯 **How to Use**: Select a sample or upload your image → Ask a question → Compare both models → Analyze the voting results!")
|
| 263 |
+
|
| 264 |
+
# Model Status at top
|
| 265 |
+
with gr.Accordion("🤖 Model Status", open=True):
|
| 266 |
+
with gr.Row():
|
| 267 |
+
status_display = gr.Markdown("Loading models...")
|
| 268 |
+
refresh_btn = gr.Button("🔄 Refresh Status", variant="secondary", scale=1)
|
| 269 |
+
|
| 270 |
+
with gr.Row(equal_height=False, variant="panel"):
|
| 271 |
+
# LEFT: Input Section
|
| 272 |
+
with gr.Column(scale=35):
|
| 273 |
+
with gr.Accordion("📤 Upload Image & Question", open=True):
|
| 274 |
+
image_input = gr.Image(label="Upload Image", type="pil", height=300)
|
| 275 |
+
question_input = gr.Textbox(
|
| 276 |
+
label="Your Question",
|
| 277 |
+
placeholder="Ask a question about the image...",
|
| 278 |
+
lines=3
|
| 279 |
+
)
|
| 280 |
+
with gr.Row():
|
| 281 |
+
max_tokens_slider = gr.Slider(
|
| 282 |
+
minimum=10, maximum=200, value=100, step=10,
|
| 283 |
+
label="Max Tokens", scale=2
|
| 284 |
+
)
|
| 285 |
+
compare_btn = gr.Button("🔍 Compare Models", variant="primary", size="lg", scale=1)
|
| 286 |
+
|
| 287 |
+
with gr.Accordion("📋 Try Sample Images", open=True):
|
| 288 |
+
sample_dropdown = gr.Dropdown(
|
| 289 |
+
label="Select Sample Dataset",
|
| 290 |
+
choices=[],
|
| 291 |
+
value=None,
|
| 292 |
+
info="Choose a sample to auto-fill the inputs above"
|
| 293 |
+
)
|
| 294 |
+
sample_image_display = gr.Image(label="Sample Preview", interactive=False, height=200)
|
| 295 |
+
with gr.Row():
|
| 296 |
+
ground_truth_display = gr.Textbox(label="Expected Answer", interactive=False, scale=2)
|
| 297 |
+
sample_info_display = gr.Textbox(label="Dataset Info", interactive=False, lines=3, scale=1)
|
| 298 |
+
|
| 299 |
+
# MIDDLE: Results Comparison
|
| 300 |
+
with gr.Column(scale=40):
|
| 301 |
+
with gr.Accordion("📊 Model Comparison Results", open=True):
|
| 302 |
+
comparison_output = gr.Markdown("Click 'Compare Models' to see results...")
|
| 303 |
+
|
| 304 |
+
with gr.Row():
|
| 305 |
+
with gr.Column():
|
| 306 |
+
gr.Markdown("#### 🔍 DAM Original")
|
| 307 |
+
original_output = gr.Markdown("Results will appear here...")
|
| 308 |
+
with gr.Column():
|
| 309 |
+
gr.Markdown("#### 🧩 DAM-QA Sliding Window")
|
| 310 |
+
sliding_output = gr.Markdown("Results will appear here...")
|
| 311 |
+
|
| 312 |
+
# RIGHT: Voting Analysis
|
| 313 |
+
with gr.Column(scale=25):
|
| 314 |
+
with gr.Accordion("🗳️ DAM-QA Voting Analysis", open=True):
|
| 315 |
+
voting_chart = gr.Plot(label="Vote Weights")
|
| 316 |
+
voting_details = gr.Markdown("Voting details will appear here...", max_height=200)
|
| 317 |
+
|
| 318 |
+
gr.Markdown("""
|
| 319 |
+
## 📋 **Key Differences**
|
| 320 |
+
|
| 321 |
+
- **DAM Original**: Processes the entire image at once, faster but may miss fine details
|
| 322 |
+
- **DAM-QA Sliding Window**: Divides image into overlapping windows, slower but better for text-rich images
|
| 323 |
+
- **Voting Mechanism**: DAM-QA aggregates predictions from multiple windows using weighted voting
|
| 324 |
+
- **Use Cases**: DAM-QA typically performs better on documents, charts, and infographics
|
| 325 |
+
""")
|
| 326 |
+
|
| 327 |
+
vlai_template.create_footer()
|
| 328 |
+
|
| 329 |
+
# Event handlers
|
| 330 |
+
def on_load():
|
| 331 |
+
# Load samples first
|
| 332 |
+
samples = load_samples()
|
| 333 |
+
choices = [(f"{s['dataset']}: {s['question'][:50]}...", i) for i, s in enumerate(samples)]
|
| 334 |
+
|
| 335 |
+
# Load models immediately (this will take time but ensures they're ready)
|
| 336 |
+
print("Loading DAM models...")
|
| 337 |
+
status = init_models()
|
| 338 |
+
print(f"Model initialization complete: {status}")
|
| 339 |
+
|
| 340 |
+
return status, gr.Dropdown(choices=choices, value=0 if choices else None)
|
| 341 |
+
|
| 342 |
+
def refresh_status():
|
| 343 |
+
"""Check current model status."""
|
| 344 |
+
if STATE["dam_original"] is not None and STATE["dam_sliding"] is not None:
|
| 345 |
+
return "✅ Both DAM models loaded successfully!"
|
| 346 |
+
else:
|
| 347 |
+
return "🔄 Models not loaded. Click to retry."
|
| 348 |
+
|
| 349 |
+
def retry_loading():
|
| 350 |
+
"""Retry loading models."""
|
| 351 |
+
return init_models()
|
| 352 |
+
|
| 353 |
+
demo.load(
|
| 354 |
+
fn=on_load,
|
| 355 |
+
outputs=[status_display, sample_dropdown]
|
| 356 |
+
)
|
| 357 |
+
|
| 358 |
+
# Add refresh button functionality
|
| 359 |
+
refresh_btn.click(
|
| 360 |
+
fn=refresh_status,
|
| 361 |
+
outputs=[status_display]
|
| 362 |
+
)
|
| 363 |
+
|
| 364 |
+
sample_dropdown.change(
|
| 365 |
+
fn=fill_from_sample,
|
| 366 |
+
inputs=[sample_dropdown],
|
| 367 |
+
outputs=[sample_image_display, ground_truth_display, sample_info_display, image_input, question_input]
|
| 368 |
+
)
|
| 369 |
+
|
| 370 |
+
compare_btn.click(
|
| 371 |
+
fn=compare_models,
|
| 372 |
+
inputs=[image_input, question_input, max_tokens_slider],
|
| 373 |
+
outputs=[comparison_output, original_output, sliding_output, voting_chart, voting_details]
|
| 374 |
+
)
|
| 375 |
+
|
| 376 |
+
if __name__ == "__main__":
|
| 377 |
+
demo.launch(
|
| 378 |
+
share=False,
|
| 379 |
+
show_error=True,
|
| 380 |
+
allowed_paths=["sample_images", "static"]
|
| 381 |
+
)
|
packages.txt
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
graphviz
|
| 2 |
+
fonts-liberation
|
requirements.txt
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
gradio==5.38.0
|
| 2 |
+
pandas>=1.5.0
|
| 3 |
+
numpy>=1.24.0
|
| 4 |
+
plotly>=5.15.0
|
| 5 |
+
torch>=2.0.0
|
| 6 |
+
transformers>=4.30.0
|
| 7 |
+
pillow>=10.0.0
|
| 8 |
+
accelerate>=0.20.0
|
| 9 |
+
opencv-python
|
| 10 |
+
sentencepiece
|
sample_images/chartqa_sample1.jpeg
ADDED
|
Git LFS Details
|
sample_images/docvqa_sample1.png
ADDED
|
Git LFS Details
|
sample_images/docvqa_sample2.png
ADDED
|
Git LFS Details
|
sample_images/infovqa_sample1.jpeg
ADDED
|
Git LFS Details
|
sample_images/textvqa_sample1.jpg
ADDED
|
Git LFS Details
|
sample_images/vqav2_sample1.png
ADDED
|
Git LFS Details
|
samples.json
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"dataset": "DocVQA",
|
| 4 |
+
"image": "sample_images/docvqa_sample1.png",
|
| 5 |
+
"question": "What is the 'actual' value per 1000, during the year 1975?",
|
| 6 |
+
"ground_truth": "0.28",
|
| 7 |
+
"description": "Document question answering about statistical data"
|
| 8 |
+
},
|
| 9 |
+
{
|
| 10 |
+
"dataset": "DocVQA",
|
| 11 |
+
"image": "sample_images/docvqa_sample2.png",
|
| 12 |
+
"question": "What is name of university?",
|
| 13 |
+
"ground_truth": "University of California",
|
| 14 |
+
"description": "Document question answering about institutional information"
|
| 15 |
+
},
|
| 16 |
+
{
|
| 17 |
+
"dataset": "InfographicVQA",
|
| 18 |
+
"image": "sample_images/infovqa_sample1.jpeg",
|
| 19 |
+
"question": "Which social platform has heavy female audience?",
|
| 20 |
+
"ground_truth": "Pinterest",
|
| 21 |
+
"description": "Infographic question answering about social media demographics"
|
| 22 |
+
},
|
| 23 |
+
{
|
| 24 |
+
"dataset": "ChartQA",
|
| 25 |
+
"image": "sample_images/chartqa_sample1.jpeg",
|
| 26 |
+
"question": "What is the highest value in the chart?",
|
| 27 |
+
"ground_truth": "Unknown (sample chart)",
|
| 28 |
+
"description": "Chart question answering about data visualization"
|
| 29 |
+
},
|
| 30 |
+
{
|
| 31 |
+
"dataset": "TextVQA",
|
| 32 |
+
"image": "sample_images/textvqa_sample1.jpg",
|
| 33 |
+
"question": "What text is visible in the image?",
|
| 34 |
+
"ground_truth": "Various text (sample image)",
|
| 35 |
+
"description": "Text-based visual question answering"
|
| 36 |
+
},
|
| 37 |
+
{
|
| 38 |
+
"dataset": "VQAv2",
|
| 39 |
+
"image": "sample_images/vqav2_sample1.png",
|
| 40 |
+
"question": "What is in the image?",
|
| 41 |
+
"ground_truth": "Various objects (sample image)",
|
| 42 |
+
"description": "General visual question answering"
|
| 43 |
+
}
|
| 44 |
+
]
|
src/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
# DAM Demo Package
|
src/dam_models.py
ADDED
|
@@ -0,0 +1,260 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
DAM Model Classes for Demo
|
| 3 |
+
Simplified versions of DAM inference for Hugging Face Space deployment
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import os
|
| 7 |
+
import torch
|
| 8 |
+
import time
|
| 9 |
+
from PIL import Image
|
| 10 |
+
from collections import defaultdict
|
| 11 |
+
from typing import Dict, Tuple, Optional
|
| 12 |
+
from transformers import AutoModel
|
| 13 |
+
|
| 14 |
+
# Simplified utility functions
|
| 15 |
+
def resize_keep_aspect(img: Image.Image, max_size: int = 1024) -> Image.Image:
|
| 16 |
+
"""Resize image while keeping aspect ratio."""
|
| 17 |
+
W, H = img.size
|
| 18 |
+
if max(W, H) <= max_size:
|
| 19 |
+
return img
|
| 20 |
+
|
| 21 |
+
if W > H:
|
| 22 |
+
new_W, new_H = max_size, int(H * max_size / W)
|
| 23 |
+
else:
|
| 24 |
+
new_W, new_H = int(W * max_size / H), max_size
|
| 25 |
+
|
| 26 |
+
return img.resize((new_W, new_H), Image.LANCZOS)
|
| 27 |
+
|
| 28 |
+
def create_full_image_mask(width: int, height: int) -> Image.Image:
|
| 29 |
+
"""Create a full white mask for the entire image."""
|
| 30 |
+
return Image.new("L", (width, height), 255)
|
| 31 |
+
|
| 32 |
+
def get_windows(width: int, height: int, window_size: int, stride: int):
|
| 33 |
+
"""Generate sliding window coordinates."""
|
| 34 |
+
windows = []
|
| 35 |
+
for y in range(0, height - window_size + 1, stride):
|
| 36 |
+
for x in range(0, width - window_size + 1, stride):
|
| 37 |
+
windows.append((x, y, min(x + window_size, width), min(y + window_size, height)))
|
| 38 |
+
|
| 39 |
+
# Add remaining edge windows
|
| 40 |
+
if width % stride != 0:
|
| 41 |
+
for y in range(0, height - window_size + 1, stride):
|
| 42 |
+
windows.append((width - window_size, y, width, min(y + window_size, height)))
|
| 43 |
+
|
| 44 |
+
if height % stride != 0:
|
| 45 |
+
for x in range(0, width - window_size + 1, stride):
|
| 46 |
+
windows.append((x, height - window_size, min(x + window_size, width), height))
|
| 47 |
+
|
| 48 |
+
return windows
|
| 49 |
+
|
| 50 |
+
def aggregate_votes(votes: Dict[str, float]) -> str:
|
| 51 |
+
"""Aggregate votes and return the answer with highest weight."""
|
| 52 |
+
if not votes:
|
| 53 |
+
return ""
|
| 54 |
+
return max(votes.items(), key=lambda x: x[1])[0]
|
| 55 |
+
|
| 56 |
+
class DAMOriginal:
|
| 57 |
+
"""Original DAM model using full image."""
|
| 58 |
+
|
| 59 |
+
def __init__(self, device: str = "auto"):
|
| 60 |
+
if device == "auto":
|
| 61 |
+
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 62 |
+
else:
|
| 63 |
+
self.device = torch.device(device)
|
| 64 |
+
|
| 65 |
+
print(f"Loading DAM model on {self.device}...")
|
| 66 |
+
self.dam_model = AutoModel.from_pretrained(
|
| 67 |
+
"nvidia/DAM-3B-Self-Contained",
|
| 68 |
+
trust_remote_code=True,
|
| 69 |
+
).to(self.device)
|
| 70 |
+
|
| 71 |
+
self.dam = self.dam_model.init_dam(conv_mode="v1", prompt_mode="full+focal_crop")
|
| 72 |
+
print("DAM Original model loaded successfully!")
|
| 73 |
+
|
| 74 |
+
def predict(self, img: Image.Image, question: str, max_new_tokens: int = 100) -> Tuple[str, float]:
|
| 75 |
+
"""
|
| 76 |
+
Generate prediction for the question using full image.
|
| 77 |
+
|
| 78 |
+
Returns:
|
| 79 |
+
Tuple of (answer, inference_time)
|
| 80 |
+
"""
|
| 81 |
+
# Resize image
|
| 82 |
+
img = resize_keep_aspect(img, 1024)
|
| 83 |
+
W, H = img.size
|
| 84 |
+
|
| 85 |
+
# Create full image mask
|
| 86 |
+
mask = create_full_image_mask(W, H)
|
| 87 |
+
|
| 88 |
+
# Format prompt
|
| 89 |
+
prompt = (
|
| 90 |
+
"<image>\n"
|
| 91 |
+
"Answer each question concisely in a single word or short phrase, "
|
| 92 |
+
"without any lengthy descriptions or explanations.\n"
|
| 93 |
+
"Rely only on information that is clearly visible in the provided image.\n"
|
| 94 |
+
"If the answer cannot be determined from the image, respond with \"unanswerable\".\n"
|
| 95 |
+
f"Question: {question}\nAnswer:"
|
| 96 |
+
)
|
| 97 |
+
|
| 98 |
+
# Inference parameters
|
| 99 |
+
params = {
|
| 100 |
+
"streaming": False,
|
| 101 |
+
"temperature": 1e-7,
|
| 102 |
+
"top_p": 0.5,
|
| 103 |
+
"num_beams": 1,
|
| 104 |
+
"max_new_tokens": max_new_tokens
|
| 105 |
+
}
|
| 106 |
+
|
| 107 |
+
start_time = time.time()
|
| 108 |
+
try:
|
| 109 |
+
tokens = self.dam.get_description(img, mask, prompt, **params)
|
| 110 |
+
inference_time = time.time() - start_time
|
| 111 |
+
|
| 112 |
+
if isinstance(tokens, str):
|
| 113 |
+
answer = tokens.strip()
|
| 114 |
+
else:
|
| 115 |
+
answer = "".join(tokens).strip()
|
| 116 |
+
|
| 117 |
+
return answer, inference_time
|
| 118 |
+
|
| 119 |
+
except Exception as e:
|
| 120 |
+
inference_time = time.time() - start_time
|
| 121 |
+
print(f"Error in DAM Original prediction: {e}")
|
| 122 |
+
return f"Error: {str(e)}", inference_time
|
| 123 |
+
|
| 124 |
+
|
| 125 |
+
class DAMSlidingWindow:
|
| 126 |
+
"""DAM model with sliding window approach."""
|
| 127 |
+
|
| 128 |
+
def __init__(self, device: str = "auto", window_size: int = 512, stride: int = 256):
|
| 129 |
+
if device == "auto":
|
| 130 |
+
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 131 |
+
else:
|
| 132 |
+
self.device = torch.device(device)
|
| 133 |
+
|
| 134 |
+
self.window_size = window_size
|
| 135 |
+
self.stride = stride
|
| 136 |
+
|
| 137 |
+
print(f"Loading DAM model on {self.device}...")
|
| 138 |
+
self.dam_model = AutoModel.from_pretrained(
|
| 139 |
+
"nvidia/DAM-3B-Self-Contained",
|
| 140 |
+
trust_remote_code=True,
|
| 141 |
+
).to(self.device)
|
| 142 |
+
|
| 143 |
+
self.dam = self.dam_model.init_dam(conv_mode="v1", prompt_mode="full+focal_crop")
|
| 144 |
+
print(f"DAM Sliding Window model loaded successfully! (window_size={window_size}, stride={stride})")
|
| 145 |
+
|
| 146 |
+
def predict(self, img: Image.Image, question: str, max_new_tokens: int = 100,
|
| 147 |
+
unanswerable_weight: float = 1.0) -> Tuple[str, float, Dict]:
|
| 148 |
+
"""
|
| 149 |
+
Generate prediction using sliding window approach with voting.
|
| 150 |
+
|
| 151 |
+
Returns:
|
| 152 |
+
Tuple of (answer, inference_time, voting_details)
|
| 153 |
+
"""
|
| 154 |
+
# Resize image
|
| 155 |
+
img = resize_keep_aspect(img, 1024)
|
| 156 |
+
W, H = img.size
|
| 157 |
+
|
| 158 |
+
# Format prompt
|
| 159 |
+
prompt = (
|
| 160 |
+
"<image>\n"
|
| 161 |
+
"Answer each question concisely in a single word or short phrase, "
|
| 162 |
+
"without any lengthy descriptions or explanations.\n"
|
| 163 |
+
"Rely only on information that is clearly visible in the provided image.\n"
|
| 164 |
+
"If the answer cannot be determined from the image, respond with \"unanswerable\".\n"
|
| 165 |
+
f"Question: {question}\nAnswer:"
|
| 166 |
+
)
|
| 167 |
+
|
| 168 |
+
# Inference parameters
|
| 169 |
+
params = {
|
| 170 |
+
"streaming": False,
|
| 171 |
+
"temperature": 1e-7,
|
| 172 |
+
"top_p": 0.5,
|
| 173 |
+
"num_beams": 1,
|
| 174 |
+
"max_new_tokens": max_new_tokens
|
| 175 |
+
}
|
| 176 |
+
|
| 177 |
+
start_time = time.time()
|
| 178 |
+
votes = defaultdict(float)
|
| 179 |
+
voting_details = {"full_image": None, "windows": []}
|
| 180 |
+
|
| 181 |
+
try:
|
| 182 |
+
# Full image vote
|
| 183 |
+
mask_full = create_full_image_mask(W, H)
|
| 184 |
+
ans_full = self.dam.get_description(img, mask_full, prompt, **params)
|
| 185 |
+
|
| 186 |
+
if isinstance(ans_full, str):
|
| 187 |
+
ans_full = ans_full.strip()
|
| 188 |
+
else:
|
| 189 |
+
ans_full = "".join(ans_full).strip()
|
| 190 |
+
|
| 191 |
+
if ans_full:
|
| 192 |
+
weight = 1.0
|
| 193 |
+
if ans_full.lower() == "unanswerable":
|
| 194 |
+
weight *= unanswerable_weight
|
| 195 |
+
votes[ans_full] += weight
|
| 196 |
+
voting_details["full_image"] = {"answer": ans_full, "weight": weight}
|
| 197 |
+
|
| 198 |
+
# Sliding window votes
|
| 199 |
+
windows = get_windows(W, H, self.window_size, self.stride)
|
| 200 |
+
for i, (x0, y0, x1, y1) in enumerate(windows):
|
| 201 |
+
crop = img.crop((x0, y0, x1, y1))
|
| 202 |
+
mask_crop = Image.new("L", (x1-x0, y1-y0), 255)
|
| 203 |
+
|
| 204 |
+
ans = self.dam.get_description(crop, mask_crop, prompt, **params)
|
| 205 |
+
|
| 206 |
+
if isinstance(ans, str):
|
| 207 |
+
ans = ans.strip()
|
| 208 |
+
else:
|
| 209 |
+
ans = "".join(ans).strip()
|
| 210 |
+
|
| 211 |
+
if ans:
|
| 212 |
+
weight = ((x1-x0) * (y1-y0)) / (W * H)
|
| 213 |
+
if ans.lower() == "unanswerable":
|
| 214 |
+
weight *= unanswerable_weight
|
| 215 |
+
votes[ans] += weight
|
| 216 |
+
|
| 217 |
+
voting_details["windows"].append({
|
| 218 |
+
"window_id": i,
|
| 219 |
+
"coords": (x0, y0, x1, y1),
|
| 220 |
+
"answer": ans,
|
| 221 |
+
"weight": weight
|
| 222 |
+
})
|
| 223 |
+
|
| 224 |
+
# Aggregate votes
|
| 225 |
+
prediction = aggregate_votes(votes)
|
| 226 |
+
if not prediction:
|
| 227 |
+
prediction = ans_full if 'ans_full' in locals() else "No answer"
|
| 228 |
+
|
| 229 |
+
inference_time = time.time() - start_time
|
| 230 |
+
|
| 231 |
+
# Add vote summary to details
|
| 232 |
+
voting_details["vote_summary"] = dict(votes)
|
| 233 |
+
voting_details["final_answer"] = prediction
|
| 234 |
+
voting_details["total_windows"] = len(windows)
|
| 235 |
+
|
| 236 |
+
return prediction, inference_time, voting_details
|
| 237 |
+
|
| 238 |
+
except Exception as e:
|
| 239 |
+
inference_time = time.time() - start_time
|
| 240 |
+
print(f"Error in DAM Sliding Window prediction: {e}")
|
| 241 |
+
return f"Error: {str(e)}", inference_time, {"error": str(e)}
|
| 242 |
+
|
| 243 |
+
|
| 244 |
+
# Global model instances (lazy loading)
|
| 245 |
+
_dam_original = None
|
| 246 |
+
_dam_sliding = None
|
| 247 |
+
|
| 248 |
+
def get_dam_original(device: str = "auto"):
|
| 249 |
+
"""Get or create DAM Original model instance."""
|
| 250 |
+
global _dam_original
|
| 251 |
+
if _dam_original is None:
|
| 252 |
+
_dam_original = DAMOriginal(device)
|
| 253 |
+
return _dam_original
|
| 254 |
+
|
| 255 |
+
def get_dam_sliding(device: str = "auto", window_size: int = 512, stride: int = 256):
|
| 256 |
+
"""Get or create DAM Sliding Window model instance."""
|
| 257 |
+
global _dam_sliding
|
| 258 |
+
if _dam_sliding is None:
|
| 259 |
+
_dam_sliding = DAMSlidingWindow(device, window_size, stride)
|
| 260 |
+
return _dam_sliding
|
static/aivn_logo.png
ADDED
|
Git LFS Details
|
static/vlai_logo.png
ADDED
|
Git LFS Details
|
vlai_template.py
ADDED
|
@@ -0,0 +1,240 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os, base64
|
| 2 |
+
import gradio as gr
|
| 3 |
+
|
| 4 |
+
# Theming (can be overridden by the host app)
|
| 5 |
+
PRIMARY_COLOR = "#0F6CBD" # medical calm blue
|
| 6 |
+
ACCENT_COLOR = "#C4314B" # medical alert red
|
| 7 |
+
SUCCESS_COLOR = "#2E7D32" # positive/ok
|
| 8 |
+
BG1 = "#F0F7FF"
|
| 9 |
+
BG2 = "#E8F0FA"
|
| 10 |
+
BG3 = "#DDE7F8"
|
| 11 |
+
FONT_FAMILY = "'Inter', -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, 'Noto Sans', 'Liberation Sans', sans-serif"
|
| 12 |
+
|
| 13 |
+
PROJECT_DESCRIPTION = ""
|
| 14 |
+
META_INFO = [] # list of (label, value)
|
| 15 |
+
|
| 16 |
+
def set_colors(primary: str = None, accent: str = None, bg1: str = None, bg2: str = None, bg3: str = None):
|
| 17 |
+
"""Allow host app to set theme colors dynamically."""
|
| 18 |
+
global PRIMARY_COLOR, ACCENT_COLOR, BG1, BG2, BG3, custom_css
|
| 19 |
+
if primary:
|
| 20 |
+
PRIMARY_COLOR = primary
|
| 21 |
+
if accent:
|
| 22 |
+
ACCENT_COLOR = accent
|
| 23 |
+
if bg1:
|
| 24 |
+
BG1 = bg1
|
| 25 |
+
if bg2:
|
| 26 |
+
BG2 = bg2
|
| 27 |
+
if bg3:
|
| 28 |
+
BG3 = bg3
|
| 29 |
+
# Rebuild CSS with new colors
|
| 30 |
+
custom_css = _build_custom_css()
|
| 31 |
+
|
| 32 |
+
def set_font(font_family: str):
|
| 33 |
+
"""Allow host app to set a custom font stack (e.g., 'Inter', system fallbacks)."""
|
| 34 |
+
global FONT_FAMILY, custom_css
|
| 35 |
+
if font_family and isinstance(font_family, str):
|
| 36 |
+
FONT_FAMILY = font_family
|
| 37 |
+
custom_css = _build_custom_css()
|
| 38 |
+
|
| 39 |
+
def set_meta(project_name: str = None, year: str = None, module: str = None, description: str = None, meta_items: list = None):
|
| 40 |
+
"""Set project metadata used across the header and info sections."""
|
| 41 |
+
global PROJECT_DESCRIPTION, META_INFO
|
| 42 |
+
if description is not None:
|
| 43 |
+
PROJECT_DESCRIPTION = description
|
| 44 |
+
if meta_items is not None:
|
| 45 |
+
META_INFO = meta_items
|
| 46 |
+
|
| 47 |
+
def configure(project_name: str = None, year: str = None, module: str = None, description: str = None,
|
| 48 |
+
colors: dict = None, font_family: str = None, meta_items: list = None):
|
| 49 |
+
"""One-call configuration for meta, theme, and font."""
|
| 50 |
+
if colors:
|
| 51 |
+
set_colors(
|
| 52 |
+
primary=colors.get("primary"),
|
| 53 |
+
accent=colors.get("accent"),
|
| 54 |
+
bg1=colors.get("bg1"),
|
| 55 |
+
bg2=colors.get("bg2"),
|
| 56 |
+
bg3=colors.get("bg3"),
|
| 57 |
+
)
|
| 58 |
+
if font_family:
|
| 59 |
+
set_font(font_family)
|
| 60 |
+
set_meta(project_name, year, module, description, meta_items)
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
def image_to_base64(image_path: str):
|
| 64 |
+
# Construct the absolute path to the image
|
| 65 |
+
current_dir = os.path.dirname(os.path.abspath(__file__))
|
| 66 |
+
full_image_path = os.path.join(current_dir, image_path)
|
| 67 |
+
with open(full_image_path, "rb") as f:
|
| 68 |
+
return base64.b64encode(f.read()).decode("utf-8")
|
| 69 |
+
|
| 70 |
+
def create_header():
|
| 71 |
+
with gr.Row():
|
| 72 |
+
with gr.Column(scale=2):
|
| 73 |
+
logo_base64 = image_to_base64("static/aivn_logo.png")
|
| 74 |
+
gr.HTML(
|
| 75 |
+
f"""<img src="data:image/png;base64,{logo_base64}"
|
| 76 |
+
alt="Logo"
|
| 77 |
+
style="height:120px;width:auto;margin:0 auto;margin-bottom:16px; display:block;">"""
|
| 78 |
+
)
|
| 79 |
+
with gr.Column(scale=2):
|
| 80 |
+
gr.HTML(f"""
|
| 81 |
+
<div style="display:flex;justify-content:flex-start;align-items:center;gap:30px;">
|
| 82 |
+
<div>
|
| 83 |
+
<h1 style="margin-bottom:0; color: {PRIMARY_COLOR}; font-size: 2.5em; font-weight: bold;">DAM-QA Demo </h1>
|
| 84 |
+
<h3 style="color: #888; font-style: italic">Describe Anything Model for Visual Question Answering on Text-rich Images</h3>
|
| 85 |
+
</div>
|
| 86 |
+
</div>
|
| 87 |
+
""")
|
| 88 |
+
|
| 89 |
+
def create_footer():
|
| 90 |
+
logo_base64_vlai = image_to_base64("static/vlai_logo.png")
|
| 91 |
+
footer_html = """
|
| 92 |
+
<style>
|
| 93 |
+
.sticky-footer{position:fixed;bottom:0px;left:0;width:100%;background:#E8F5E8;
|
| 94 |
+
padding:10px;box-shadow:0 -2px 10px rgba(0,0,0,0.1);z-index:1000;}
|
| 95 |
+
.content-wrap{padding-bottom:60px;}
|
| 96 |
+
</style>""" + f"""
|
| 97 |
+
<div class="sticky-footer">
|
| 98 |
+
<div style="text-align:center;font-size:18px; color: #888">
|
| 99 |
+
Created by
|
| 100 |
+
<a href="https://vlai.work" target="_blank" style="color:#465C88;text-decoration:none;font-weight:bold; display:inline-flex; align-items:center;"> VLAI
|
| 101 |
+
<img src="data:image/png;base64,{logo_base64_vlai}" alt="Logo" style="height:20px; width:auto;">
|
| 102 |
+
</a> from <a href="https://aivietnam.edu.vn/" target="_blank" style="color:#355724;text-decoration:none;font-weight:bold">AI VIET NAM</a>
|
| 103 |
+
</div>
|
| 104 |
+
</div>
|
| 105 |
+
"""
|
| 106 |
+
return gr.HTML(footer_html)
|
| 107 |
+
|
| 108 |
+
def _build_custom_css() -> str:
|
| 109 |
+
return f"""
|
| 110 |
+
@import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap');
|
| 111 |
+
|
| 112 |
+
.gradio-container {{
|
| 113 |
+
min-height: 100vh !important;
|
| 114 |
+
width: 100vw !important;
|
| 115 |
+
margin: 0 !important;
|
| 116 |
+
padding: 0px !important;
|
| 117 |
+
background: linear-gradient(135deg, {BG1} 0%, {BG2} 50%, {BG3} 100%);
|
| 118 |
+
background-size: 600% 600%;
|
| 119 |
+
animation: gradientBG 7s ease infinite;
|
| 120 |
+
}}
|
| 121 |
+
|
| 122 |
+
/* Global font setup */
|
| 123 |
+
body, .gradio-container, .gr-block, .gr-markdown, .gr-button, .gr-input,
|
| 124 |
+
.gr-dropdown, .gr-number, .gr-plot, .gr-dataframe, .gr-accordion, .gr-form,
|
| 125 |
+
.gr-textbox, .gr-html, table, th, td, label, h1, h2, h3, h4, h5, h6, p, span, div {{
|
| 126 |
+
font-family: {FONT_FAMILY} !important;
|
| 127 |
+
}}
|
| 128 |
+
|
| 129 |
+
@keyframes gradientBG {{
|
| 130 |
+
0% {{background-position: 0% 50%;}}
|
| 131 |
+
50% {{background-position: 100% 50%;}}
|
| 132 |
+
100% {{background-position: 0% 50%;}}
|
| 133 |
+
}}
|
| 134 |
+
|
| 135 |
+
/* Minimize spacing and padding */
|
| 136 |
+
.content-wrap {{
|
| 137 |
+
padding: 2px !important;
|
| 138 |
+
margin: 0 !important;
|
| 139 |
+
}}
|
| 140 |
+
|
| 141 |
+
/* Reduce component spacing */
|
| 142 |
+
.gr-row {{
|
| 143 |
+
gap: 5px !important;
|
| 144 |
+
margin: 2px 0 !important;
|
| 145 |
+
}}
|
| 146 |
+
|
| 147 |
+
.gr-column {{
|
| 148 |
+
gap: 4px !important;
|
| 149 |
+
padding: 4px !important;
|
| 150 |
+
}}
|
| 151 |
+
|
| 152 |
+
/* Accordion optimization */
|
| 153 |
+
.gr-accordion {{
|
| 154 |
+
margin: 4px 0 !important;
|
| 155 |
+
}}
|
| 156 |
+
|
| 157 |
+
.gr-accordion .gr-accordion-content {{
|
| 158 |
+
padding: 2px !important;
|
| 159 |
+
}}
|
| 160 |
+
|
| 161 |
+
/* Form elements spacing */
|
| 162 |
+
.gr-form {{
|
| 163 |
+
gap: 2px !important;
|
| 164 |
+
}}
|
| 165 |
+
|
| 166 |
+
/* Button styling */
|
| 167 |
+
.gr-button {{
|
| 168 |
+
margin: 2px 0 !important;
|
| 169 |
+
}}
|
| 170 |
+
|
| 171 |
+
/* DataFrame optimization */
|
| 172 |
+
.gr-dataframe {{
|
| 173 |
+
margin: 4px 0 !important;
|
| 174 |
+
}}
|
| 175 |
+
|
| 176 |
+
/* Remove horizontal scroll from data preview */
|
| 177 |
+
.gr-dataframe .wrap {{
|
| 178 |
+
overflow-x: auto !important;
|
| 179 |
+
max-width: 100% !important;
|
| 180 |
+
}}
|
| 181 |
+
|
| 182 |
+
/* Plot optimization */
|
| 183 |
+
.gr-plot {{
|
| 184 |
+
margin: 4px 0 !important;
|
| 185 |
+
}}
|
| 186 |
+
|
| 187 |
+
/* Reduce markdown margins */
|
| 188 |
+
.gr-markdown {{
|
| 189 |
+
margin: 2px 0 !important;
|
| 190 |
+
}}
|
| 191 |
+
|
| 192 |
+
/* Footer positioning */
|
| 193 |
+
.sticky-footer {{
|
| 194 |
+
position: fixed;
|
| 195 |
+
bottom: 0px;
|
| 196 |
+
left: 0;
|
| 197 |
+
width: 100%;
|
| 198 |
+
background: {BG1};
|
| 199 |
+
padding: 6px !important;
|
| 200 |
+
box-shadow: 0 -2px 10px rgba(0,0,0,0.1);
|
| 201 |
+
z-index: 1000;
|
| 202 |
+
}}
|
| 203 |
+
"""
|
| 204 |
+
|
| 205 |
+
# Initialize CSS using defaults
|
| 206 |
+
custom_css = _build_custom_css()
|
| 207 |
+
|
| 208 |
+
def render_info_card(description: str = None, meta_items: list = None, icon: str = "🧠", title: str = "About this demo") -> str:
|
| 209 |
+
desc = description if description is not None else PROJECT_DESCRIPTION
|
| 210 |
+
items = meta_items if meta_items is not None else META_INFO
|
| 211 |
+
meta_html = " · ".join([f"<span><strong>{k}</strong>: {v}</span>" for k, v in items]) if items else ""
|
| 212 |
+
return f"""
|
| 213 |
+
<div style="margin: 8px 0 8px 0;">
|
| 214 |
+
<div style="background:#F5F9FF;border-left:6px solid {PRIMARY_COLOR};padding:14px 16px;border-radius:10px;box-shadow:0 1px 3px rgba(0,0,0,0.06);">
|
| 215 |
+
<div style="display:flex;gap:14px;align-items:flex-start;">
|
| 216 |
+
<div style="font-size:22px;">{icon}</div>
|
| 217 |
+
<div>
|
| 218 |
+
<div style="font-weight:700;color:{PRIMARY_COLOR};margin-bottom:4px;">{title}</div>
|
| 219 |
+
<div style="color:#000;font-size:14px;line-height:1.5;">{desc}</div>
|
| 220 |
+
<div style="margin-top:8px;color:#000;font-size:13px;">{meta_html}</div>
|
| 221 |
+
</div>
|
| 222 |
+
</div>
|
| 223 |
+
</div>
|
| 224 |
+
</div>
|
| 225 |
+
"""
|
| 226 |
+
|
| 227 |
+
def render_disclaimer(text: str, icon: str = "⚠️", title: str = "Educational Use Only") -> str:
|
| 228 |
+
return f"""
|
| 229 |
+
<div style=\"margin: 8px 0 6px 0;\">
|
| 230 |
+
<div style=\"background:#FFF4F4;border-left:6px solid {ACCENT_COLOR};padding:12px 16px;border-radius:8px;box-shadow:0 1px 3px rgba(0,0,0,0.06);\">
|
| 231 |
+
<div style=\"display:flex;gap:10px;align-items:flex-start;color:#000;\">
|
| 232 |
+
<span style=\"font-size:20px\">{icon}</span>
|
| 233 |
+
<div>
|
| 234 |
+
<div style=\"font-weight:700; margin-bottom:4px;\">{title}</div>
|
| 235 |
+
<div style=\"font-size:14px; line-height:1.4;\">{text}</div>
|
| 236 |
+
</div>
|
| 237 |
+
</div>
|
| 238 |
+
</div>
|
| 239 |
+
</div>
|
| 240 |
+
"""
|