Spaces:

tuandunghcmut
/

viscot-demo

Running on Zero

dung-vpt-uney commited on Oct 12

Commit

ba64608

1 Parent(s): 3564f62

Update Visual-CoT demo - 2025-10-12 23:18:36

Fixes:
- Fix LLaVA config registration error (compatibility with newer transformers)
- Update Gradio to latest version (security fixes)
- Auto-deployed via update script

Files changed (2) hide show

app.py +130 -19
requirements.txt +1 -0

app.py CHANGED Viewed

@@ -54,21 +54,26 @@ else:
 # Configuration
 # =============================================================================
-MODEL_PATH = "deepcs233/VisCoT-7b-224"  # Hugging Face model ID (smallest version)
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
-# Benchmark datasets available
-BENCHMARK_DATASETS = [
-    "docvqa",
-    "flickr30k",
-    "gqa",
-    "infographicsvqa",
-    "openimages",
-    "textcap",
-    "textvqa",
-    "vsr",
-    "cub",
-]
 # =============================================================================
 # Model Loading (Global - bfloat16)
@@ -101,6 +106,87 @@ print(f"✓ Context length: {context_len}")
 print(f"✓ Device: {DEVICE}")
 # =============================================================================
 # Utility Functions
 # =============================================================================
@@ -325,7 +411,7 @@ def create_demo():
     .header {
         text-align: center;
         padding: 20px;
-        background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
         color: white;
         border-radius: 10px;
         margin-bottom: 20px;
@@ -357,8 +443,9 @@ def create_demo():
     with gr.Blocks(
         theme=gr.themes.Soft(
-            primary_hue="indigo",
-            secondary_hue="purple",
         ),
         css=custom_css,
         title="Visual-CoT Demo"
@@ -405,13 +492,37 @@ def create_demo():
         gr.HTML("""
         <div class="info-box">
             <p style="margin: 0; font-size: 14px;">
-                🔐 <strong>Authentication Required:</strong> This Space uses Zero GPU which requires you to be logged in to Hugging Face.
-                If you see quota errors, please <a href="https://huggingface.co/login" target="_blank">login</a> or
-                <a href="https://huggingface.co/join" target="_blank">create a free account</a>.
             </p>
         </div>
         """)
         with gr.Tabs():
             # ============================================================
             # Tab 1: Interactive Demo

 # Configuration
 # =============================================================================
+# Available models
+AVAILABLE_MODELS = {
+    "VisCoT-7B-224 (Fastest)": "deepcs233/VisCoT-7b-224",
+    "VisCoT-7B-336 (Balanced)": "deepcs233/VisCoT-7b-336",
+    "VisCoT-13B-224 (Better)": "deepcs233/VisCoT-13b-224",
+    "VisCoT-13B-336 (Best)": "deepcs233/VisCoT-13b-336",
+}
+MODEL_PATH = "deepcs233/VisCoT-7b-224"  # Default: smallest/fastest
+CURRENT_MODEL_NAME = "VisCoT-7B-224 (Fastest)"
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+# Benchmark datasets from HF collection
+BENCHMARK_DATASETS = {
+    "GQA": "tuandunghcmut/gqa_cot",
+    "TextVQA": "tuandunghcmut/textvqa_cot",
+    "DocVQA": "tuandunghcmut/docvqa_cot",
+    "Flickr30K": "tuandunghcmut/flickr30k_cot",
+    "InfographicsVQA": "tuandunghcmut/infographicsvqa_cot",
+}
 # =============================================================================
 # Model Loading (Global - bfloat16)
 print(f"✓ Device: {DEVICE}")
+# =============================================================================
+# Model Management Functions
+# =============================================================================
+def switch_model(model_choice):
+    """Switch to a different model"""
+    global tokenizer, model, image_processor, context_len, MODEL_PATH, CURRENT_MODEL_NAME
+    try:
+        new_model_path = AVAILABLE_MODELS[model_choice]
+        if new_model_path == MODEL_PATH:
+            return f"Already using {model_choice}"
+        print(f"\n🔄 Switching to {model_choice}...")
+        disable_torch_init()
+        model_name = get_model_name_from_path(new_model_path)
+        # Load new model
+        tokenizer, model, image_processor, context_len = load_pretrained_model(
+            new_model_path,
+            None,
+            model_name,
+            load_8bit=False,
+            load_4bit=False,
+            device=DEVICE,
+        )
+        # Ensure bfloat16
+        if DEVICE == "cuda":
+            model = model.to(dtype=torch.bfloat16)
+        MODEL_PATH = new_model_path
+        CURRENT_MODEL_NAME = model_choice
+        print(f"✓ Switched to {model_choice}")
+        return f"✓ Successfully switched to {model_choice}\nModel: {model_name}\nDevice: {DEVICE}"
+    except Exception as e:
+        import traceback
+        error_msg = f"❌ Failed to switch model: {str(e)}\n{traceback.format_exc()}"
+        print(error_msg)
+        return error_msg
+# =============================================================================
+# Benchmark Loading Functions
+# =============================================================================
+def load_benchmark_example(dataset_name, index=0):
+    """Load an example from benchmark dataset"""
+    try:
+        from datasets import load_dataset
+        dataset_path = BENCHMARK_DATASETS.get(dataset_name)
+        if not dataset_path:
+            return None, "Dataset not found", "", ""
+        # Load dataset
+        dataset = load_dataset(dataset_path, split="train")
+        if index >= len(dataset):
+            index = 0
+        example = dataset[index]
+        # Extract fields
+        image = example.get("image")
+        question = example.get("question", "")
+        bbox = example.get("bbox", "")
+        answer = example.get("answer", "")
+        info = f"Dataset: {dataset_name} | Example {index + 1}/{len(dataset)}"
+        return image, question, bbox, answer, info
+    except Exception as e:
+        error_msg = f"Error loading benchmark: {str(e)}"
+        print(error_msg)
+        return None, error_msg, "", "", ""
 # =============================================================================
 # Utility Functions
 # =============================================================================
     .header {
         text-align: center;
         padding: 20px;
+        background: linear-gradient(135deg, #475569 0%, #334155 100%);
         color: white;
         border-radius: 10px;
         margin-bottom: 20px;
     with gr.Blocks(
         theme=gr.themes.Soft(
+            primary_hue="slate",
+            secondary_hue="gray",
+            neutral_hue="slate",
         ),
         css=custom_css,
         title="Visual-CoT Demo"
         gr.HTML("""
         <div class="info-box">
             <p style="margin: 0; font-size: 14px;">
+                <strong>Note:</strong> This Space uses Zero GPU which requires authentication.
+                Please <a href="https://huggingface.co/login" target="_blank">login</a> or
+                <a href="https://huggingface.co/join" target="_blank">create a free account</a> if you encounter quota errors.
             </p>
         </div>
         """)
+        # Model Selector
+        with gr.Row():
+            with gr.Column(scale=2):
+                gr.Markdown("### Model Selection")
+                model_dropdown = gr.Dropdown(
+                    choices=list(AVAILABLE_MODELS.keys()),
+                    value=CURRENT_MODEL_NAME,
+                    label="Select Model",
+                    info="Choose model variant (larger = better quality, slower)"
+                )
+            with gr.Column(scale=1):
+                gr.Markdown("### Current Model Status")
+                model_status = gr.Textbox(
+                    value=f"Active: {CURRENT_MODEL_NAME}",
+                    label="Status",
+                    interactive=False
+                )
+        model_dropdown.change(
+            fn=switch_model,
+            inputs=[model_dropdown],
+            outputs=[model_status]
+        )
         with gr.Tabs():
             # ============================================================
             # Tab 1: Interactive Demo

requirements.txt CHANGED Viewed

@@ -12,6 +12,7 @@ sentencepiece==0.1.99
 gradio  # Latest version with all security updates
 spaces>=0.19.4
 huggingface_hub>=0.20.0  # For HF authentication and model downloads
 # Model dependencies
 accelerate==0.21.0

 gradio  # Latest version with all security updates
 spaces>=0.19.4
 huggingface_hub>=0.20.0  # For HF authentication and model downloads
+datasets>=2.14.0  # For loading benchmark datasets
 # Model dependencies
 accelerate==0.21.0