Spaces:

tuandunghcmut
/

viscot-demo

Running on Zero

dung-vpt-uney commited on Oct 12

Commit

31a530c

1 Parent(s): f39b78a

Update Visual-CoT demo - 2025-10-12 23:34:21

Fixes:
- Fix LLaVA config registration error (compatibility with newer transformers)
- Update Gradio to latest version (security fixes)
- Auto-deployed via update script

Files changed (1) hide show

app.py +97 -61

app.py CHANGED Viewed

@@ -34,6 +34,21 @@ from llava.mm_utils import (
     get_model_name_from_path,
 )
 # =============================================================================
 # Authentication
 # =============================================================================
@@ -66,14 +81,16 @@ MODEL_PATH = "deepcs233/VisCoT-7b-224"  # Default: smallest/fastest
 CURRENT_MODEL_NAME = "VisCoT-7B-224 (Fastest)"
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
-# Benchmark datasets from HF collection
-BENCHMARK_DATASETS = {
-    "GQA": "tuandunghcmut/gqa_cot",
-    "TextVQA": "tuandunghcmut/textvqa_cot",
-    "DocVQA": "tuandunghcmut/docvqa_cot",
-    "Flickr30K": "tuandunghcmut/flickr30k_cot",
-    "InfographicsVQA": "tuandunghcmut/infographicsvqa_cot",
-}
 # =============================================================================
 # Model Loading (Global - bfloat16)
@@ -156,36 +173,13 @@ def switch_model(model_choice):
 # =============================================================================
 def load_benchmark_example(dataset_name, index=0):
-    """Load an example from benchmark dataset"""
-    try:
-        from datasets import load_dataset
-        dataset_path = BENCHMARK_DATASETS.get(dataset_name)
-        if not dataset_path:
-            return None, "Dataset not found", "", "", ""
-        # Load dataset
-        dataset = load_dataset(dataset_path, split="train")
-        if index >= len(dataset):
-            index = 0
-        example = dataset[index]
-        # Extract fields
-        image = example.get("image")
-        question = example.get("question", "")
-        bbox = example.get("bbox", "")
-        answer = example.get("answer", "")
-        info = f"Dataset: {dataset_name} | Example {index + 1}/{len(dataset)}"
-        return image, question, bbox, answer, info
-    except Exception as e:
-        error_msg = f"Error loading benchmark: {str(e)}"
-        print(error_msg)
-        return None, error_msg, "", "", ""
 # =============================================================================
 # Utility Functions
@@ -610,16 +604,42 @@ def create_demo():
                             visible=False,
                         )
-                # Example images
-                gr.Markdown("### 📋 Try These Examples")
-                gr.Examples(
-                    examples=[
-                        ["examples/extreme_ironing.jpg", "What is unusual about this image?"],
-                        ["examples/waterview.jpg", "What are the things I should be cautious about when I visit here?"],
-                    ],
-                    inputs=[image_input, question_input],
-                    label="Click to load example",
-                )
                 # Event handlers
                 submit_btn.click(
@@ -698,19 +718,35 @@ def create_demo():
                             interactive=False,
                         )
-                gr.Markdown("""
-                ---
-                ### Dataset Information
-                1. **GQA** - Scene graph question answering with compositional reasoning
-                2. **TextVQA** - Questions requiring reading and understanding text in images
-                3. **DocVQA** - Document understanding and information extraction
-                4. **Visual7W** - Visual question answering with pointing and telling tasks
-                5. **Flickr30k** - Image captioning and visual grounding
-                **Note:** Examples are loaded directly from the [Visual-CoT Hugging Face Collection](https://huggingface.co/collections/tuandunghcmut/visual-chain-of-thought-reasoning-benchmarks-68e25b22c3c095c6f87baba0).
-                """)
                 # Event handlers
                 def load_and_update(dataset_name, index):

     get_model_name_from_path,
 )
+# Import benchmark loader for local datasets
+try:
+    from benchmark_loader import (
+        get_all_dataset_names,
+        load_benchmark_example_for_gradio,
+        get_random_examples_for_gradio,
+        get_dataset_info,
+        get_dataset_stats,
+    )
+    BENCHMARK_LOADER_AVAILABLE = True
+    print("✅ Benchmark loader module imported successfully")
+except ImportError as e:
+    BENCHMARK_LOADER_AVAILABLE = False
+    print(f"⚠️ Benchmark loader not available: {e}")
 # =============================================================================
 # Authentication
 # =============================================================================
 CURRENT_MODEL_NAME = "VisCoT-7B-224 (Fastest)"
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+# Benchmark datasets - will be loaded from benchmark_loader module
+if BENCHMARK_LOADER_AVAILABLE:
+    BENCHMARK_DATASETS = get_all_dataset_names()
+    print(f"✅ Loaded {len(BENCHMARK_DATASETS)} benchmark datasets")
+    stats = get_dataset_stats()
+    total_examples = sum(s.get("total_examples", 0) for s in stats.values() if "error" not in s)
+    print(f"📊 Total examples across all benchmarks: {total_examples:,}")
+else:
+    BENCHMARK_DATASETS = ["GQA", "TextVQA", "DocVQA", "Visual7W", "Flickr30k"]
+    print("⚠️ Using fallback benchmark list")
 # =============================================================================
 # Model Loading (Global - bfloat16)
 # =============================================================================
 def load_benchmark_example(dataset_name, index=0):
+    """Load an example from benchmark dataset using benchmark_loader"""
+    if BENCHMARK_LOADER_AVAILABLE:
+        return load_benchmark_example_for_gradio(dataset_name, index)
+    else:
+        # Fallback for when benchmark_loader is not available
+        error_msg = "Benchmark loader module not available"
+        return None, error_msg, "", "", error_msg
 # =============================================================================
 # Utility Functions
                             visible=False,
                         )
+                # Example images from benchmarks
+                gr.Markdown("### 📋 Try These Examples from Benchmarks")
+                # Generate examples from multiple benchmarks if available
+                if BENCHMARK_LOADER_AVAILABLE:
+                    try:
+                        benchmark_examples = get_random_examples_for_gradio(count=6)
+                        if benchmark_examples:
+                            gr.Examples(
+                                examples=benchmark_examples,
+                                inputs=[image_input, question_input],
+                                label="Click to load random benchmark examples",
+                            )
+                        else:
+                            gr.Markdown("*Benchmark examples loading failed. Check if images are available.*")
+                    except Exception as e:
+                        gr.Markdown(f"*Could not load benchmark examples: {e}*")
+                        # Fallback to default examples
+                        gr.Examples(
+                            examples=[
+                                ["examples/extreme_ironing.jpg", "What is unusual about this image?"],
+                                ["examples/waterview.jpg", "What are the things I should be cautious about when I visit here?"],
+                            ],
+                            inputs=[image_input, question_input],
+                            label="Click to load example",
+                        )
+                else:
+                    # Fallback examples when benchmark loader not available
+                    gr.Examples(
+                        examples=[
+                            ["examples/extreme_ironing.jpg", "What is unusual about this image?"],
+                            ["examples/waterview.jpg", "What are the things I should be cautious about when I visit here?"],
+                        ],
+                        inputs=[image_input, question_input],
+                        label="Click to load example",
+                    )
                 # Event handlers
                 submit_btn.click(
                             interactive=False,
                         )
+                # Dataset information - dynamically generated
+                if BENCHMARK_LOADER_AVAILABLE:
+                    dataset_info_md = "---\n\n### Available Benchmark Datasets\n\n"
+                    stats = get_dataset_stats()
+                    for i, (name, info) in enumerate(stats.items(), 1):
+                        if "error" not in info:
+                            dataset_info_md += f"{i}. **{name}** ({info['total_examples']:,} examples): {info['description']}\n"
+                        else:
+                            dataset_info_md += f"{i}. **{name}**: {info['error']}\n"
+                    total_examples = sum(s.get("total_examples", 0) for s in stats.values() if "error" not in s)
+                    dataset_info_md += f"\n**Total:** {total_examples:,} annotated examples across {len(stats)} benchmarks\n"
+                    dataset_info_md += "\n**Source:** Local JSONL files from Visual-CoT dataset"
+                    gr.Markdown(dataset_info_md)
+                else:
+                    gr.Markdown("""
+                    ---
+                    ### Dataset Information
+                    1. **GQA** - Scene graph question answering with compositional reasoning
+                    2. **TextVQA** - Questions requiring reading and understanding text in images
+                    3. **DocVQA** - Document understanding and information extraction
+                    4. **Visual7W** - Visual question answering with pointing and telling tasks
+                    5. **Flickr30k** - Image captioning and visual grounding
+                    **Note:** Benchmark loader module not available.
+                    """)
                 # Event handlers
                 def load_and_update(dataset_name, index):