Spaces:
Running
on
Zero
Running
on
Zero
dung-vpt-uney
commited on
Commit
·
ba64608
1
Parent(s):
3564f62
Update Visual-CoT demo - 2025-10-12 23:18:36
Browse filesFixes:
- Fix LLaVA config registration error (compatibility with newer transformers)
- Update Gradio to latest version (security fixes)
- Auto-deployed via update script
- app.py +130 -19
- requirements.txt +1 -0
app.py
CHANGED
|
@@ -54,21 +54,26 @@ else:
|
|
| 54 |
# Configuration
|
| 55 |
# =============================================================================
|
| 56 |
|
| 57 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
| 59 |
|
| 60 |
-
# Benchmark datasets
|
| 61 |
-
BENCHMARK_DATASETS =
|
| 62 |
-
"
|
| 63 |
-
"
|
| 64 |
-
"
|
| 65 |
-
"
|
| 66 |
-
"
|
| 67 |
-
|
| 68 |
-
"textvqa",
|
| 69 |
-
"vsr",
|
| 70 |
-
"cub",
|
| 71 |
-
]
|
| 72 |
|
| 73 |
# =============================================================================
|
| 74 |
# Model Loading (Global - bfloat16)
|
|
@@ -101,6 +106,87 @@ print(f"✓ Context length: {context_len}")
|
|
| 101 |
print(f"✓ Device: {DEVICE}")
|
| 102 |
|
| 103 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 104 |
# =============================================================================
|
| 105 |
# Utility Functions
|
| 106 |
# =============================================================================
|
|
@@ -325,7 +411,7 @@ def create_demo():
|
|
| 325 |
.header {
|
| 326 |
text-align: center;
|
| 327 |
padding: 20px;
|
| 328 |
-
background: linear-gradient(135deg, #
|
| 329 |
color: white;
|
| 330 |
border-radius: 10px;
|
| 331 |
margin-bottom: 20px;
|
|
@@ -357,8 +443,9 @@ def create_demo():
|
|
| 357 |
|
| 358 |
with gr.Blocks(
|
| 359 |
theme=gr.themes.Soft(
|
| 360 |
-
primary_hue="
|
| 361 |
-
secondary_hue="
|
|
|
|
| 362 |
),
|
| 363 |
css=custom_css,
|
| 364 |
title="Visual-CoT Demo"
|
|
@@ -405,13 +492,37 @@ def create_demo():
|
|
| 405 |
gr.HTML("""
|
| 406 |
<div class="info-box">
|
| 407 |
<p style="margin: 0; font-size: 14px;">
|
| 408 |
-
|
| 409 |
-
|
| 410 |
-
<a href="https://huggingface.co/join" target="_blank">create a free account</a
|
| 411 |
</p>
|
| 412 |
</div>
|
| 413 |
""")
|
| 414 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 415 |
with gr.Tabs():
|
| 416 |
# ============================================================
|
| 417 |
# Tab 1: Interactive Demo
|
|
|
|
| 54 |
# Configuration
|
| 55 |
# =============================================================================
|
| 56 |
|
| 57 |
+
# Available models
|
| 58 |
+
AVAILABLE_MODELS = {
|
| 59 |
+
"VisCoT-7B-224 (Fastest)": "deepcs233/VisCoT-7b-224",
|
| 60 |
+
"VisCoT-7B-336 (Balanced)": "deepcs233/VisCoT-7b-336",
|
| 61 |
+
"VisCoT-13B-224 (Better)": "deepcs233/VisCoT-13b-224",
|
| 62 |
+
"VisCoT-13B-336 (Best)": "deepcs233/VisCoT-13b-336",
|
| 63 |
+
}
|
| 64 |
+
|
| 65 |
+
MODEL_PATH = "deepcs233/VisCoT-7b-224" # Default: smallest/fastest
|
| 66 |
+
CURRENT_MODEL_NAME = "VisCoT-7B-224 (Fastest)"
|
| 67 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
| 68 |
|
| 69 |
+
# Benchmark datasets from HF collection
|
| 70 |
+
BENCHMARK_DATASETS = {
|
| 71 |
+
"GQA": "tuandunghcmut/gqa_cot",
|
| 72 |
+
"TextVQA": "tuandunghcmut/textvqa_cot",
|
| 73 |
+
"DocVQA": "tuandunghcmut/docvqa_cot",
|
| 74 |
+
"Flickr30K": "tuandunghcmut/flickr30k_cot",
|
| 75 |
+
"InfographicsVQA": "tuandunghcmut/infographicsvqa_cot",
|
| 76 |
+
}
|
|
|
|
|
|
|
|
|
|
|
|
|
| 77 |
|
| 78 |
# =============================================================================
|
| 79 |
# Model Loading (Global - bfloat16)
|
|
|
|
| 106 |
print(f"✓ Device: {DEVICE}")
|
| 107 |
|
| 108 |
|
| 109 |
+
# =============================================================================
|
| 110 |
+
# Model Management Functions
|
| 111 |
+
# =============================================================================
|
| 112 |
+
|
| 113 |
+
def switch_model(model_choice):
|
| 114 |
+
"""Switch to a different model"""
|
| 115 |
+
global tokenizer, model, image_processor, context_len, MODEL_PATH, CURRENT_MODEL_NAME
|
| 116 |
+
|
| 117 |
+
try:
|
| 118 |
+
new_model_path = AVAILABLE_MODELS[model_choice]
|
| 119 |
+
|
| 120 |
+
if new_model_path == MODEL_PATH:
|
| 121 |
+
return f"Already using {model_choice}"
|
| 122 |
+
|
| 123 |
+
print(f"\n🔄 Switching to {model_choice}...")
|
| 124 |
+
disable_torch_init()
|
| 125 |
+
|
| 126 |
+
model_name = get_model_name_from_path(new_model_path)
|
| 127 |
+
|
| 128 |
+
# Load new model
|
| 129 |
+
tokenizer, model, image_processor, context_len = load_pretrained_model(
|
| 130 |
+
new_model_path,
|
| 131 |
+
None,
|
| 132 |
+
model_name,
|
| 133 |
+
load_8bit=False,
|
| 134 |
+
load_4bit=False,
|
| 135 |
+
device=DEVICE,
|
| 136 |
+
)
|
| 137 |
+
|
| 138 |
+
# Ensure bfloat16
|
| 139 |
+
if DEVICE == "cuda":
|
| 140 |
+
model = model.to(dtype=torch.bfloat16)
|
| 141 |
+
|
| 142 |
+
MODEL_PATH = new_model_path
|
| 143 |
+
CURRENT_MODEL_NAME = model_choice
|
| 144 |
+
|
| 145 |
+
print(f"✓ Switched to {model_choice}")
|
| 146 |
+
return f"✓ Successfully switched to {model_choice}\nModel: {model_name}\nDevice: {DEVICE}"
|
| 147 |
+
|
| 148 |
+
except Exception as e:
|
| 149 |
+
import traceback
|
| 150 |
+
error_msg = f"❌ Failed to switch model: {str(e)}\n{traceback.format_exc()}"
|
| 151 |
+
print(error_msg)
|
| 152 |
+
return error_msg
|
| 153 |
+
|
| 154 |
+
# =============================================================================
|
| 155 |
+
# Benchmark Loading Functions
|
| 156 |
+
# =============================================================================
|
| 157 |
+
|
| 158 |
+
def load_benchmark_example(dataset_name, index=0):
|
| 159 |
+
"""Load an example from benchmark dataset"""
|
| 160 |
+
try:
|
| 161 |
+
from datasets import load_dataset
|
| 162 |
+
|
| 163 |
+
dataset_path = BENCHMARK_DATASETS.get(dataset_name)
|
| 164 |
+
if not dataset_path:
|
| 165 |
+
return None, "Dataset not found", "", ""
|
| 166 |
+
|
| 167 |
+
# Load dataset
|
| 168 |
+
dataset = load_dataset(dataset_path, split="train")
|
| 169 |
+
|
| 170 |
+
if index >= len(dataset):
|
| 171 |
+
index = 0
|
| 172 |
+
|
| 173 |
+
example = dataset[index]
|
| 174 |
+
|
| 175 |
+
# Extract fields
|
| 176 |
+
image = example.get("image")
|
| 177 |
+
question = example.get("question", "")
|
| 178 |
+
bbox = example.get("bbox", "")
|
| 179 |
+
answer = example.get("answer", "")
|
| 180 |
+
|
| 181 |
+
info = f"Dataset: {dataset_name} | Example {index + 1}/{len(dataset)}"
|
| 182 |
+
|
| 183 |
+
return image, question, bbox, answer, info
|
| 184 |
+
|
| 185 |
+
except Exception as e:
|
| 186 |
+
error_msg = f"Error loading benchmark: {str(e)}"
|
| 187 |
+
print(error_msg)
|
| 188 |
+
return None, error_msg, "", "", ""
|
| 189 |
+
|
| 190 |
# =============================================================================
|
| 191 |
# Utility Functions
|
| 192 |
# =============================================================================
|
|
|
|
| 411 |
.header {
|
| 412 |
text-align: center;
|
| 413 |
padding: 20px;
|
| 414 |
+
background: linear-gradient(135deg, #475569 0%, #334155 100%);
|
| 415 |
color: white;
|
| 416 |
border-radius: 10px;
|
| 417 |
margin-bottom: 20px;
|
|
|
|
| 443 |
|
| 444 |
with gr.Blocks(
|
| 445 |
theme=gr.themes.Soft(
|
| 446 |
+
primary_hue="slate",
|
| 447 |
+
secondary_hue="gray",
|
| 448 |
+
neutral_hue="slate",
|
| 449 |
),
|
| 450 |
css=custom_css,
|
| 451 |
title="Visual-CoT Demo"
|
|
|
|
| 492 |
gr.HTML("""
|
| 493 |
<div class="info-box">
|
| 494 |
<p style="margin: 0; font-size: 14px;">
|
| 495 |
+
<strong>Note:</strong> This Space uses Zero GPU which requires authentication.
|
| 496 |
+
Please <a href="https://huggingface.co/login" target="_blank">login</a> or
|
| 497 |
+
<a href="https://huggingface.co/join" target="_blank">create a free account</a> if you encounter quota errors.
|
| 498 |
</p>
|
| 499 |
</div>
|
| 500 |
""")
|
| 501 |
|
| 502 |
+
# Model Selector
|
| 503 |
+
with gr.Row():
|
| 504 |
+
with gr.Column(scale=2):
|
| 505 |
+
gr.Markdown("### Model Selection")
|
| 506 |
+
model_dropdown = gr.Dropdown(
|
| 507 |
+
choices=list(AVAILABLE_MODELS.keys()),
|
| 508 |
+
value=CURRENT_MODEL_NAME,
|
| 509 |
+
label="Select Model",
|
| 510 |
+
info="Choose model variant (larger = better quality, slower)"
|
| 511 |
+
)
|
| 512 |
+
with gr.Column(scale=1):
|
| 513 |
+
gr.Markdown("### Current Model Status")
|
| 514 |
+
model_status = gr.Textbox(
|
| 515 |
+
value=f"Active: {CURRENT_MODEL_NAME}",
|
| 516 |
+
label="Status",
|
| 517 |
+
interactive=False
|
| 518 |
+
)
|
| 519 |
+
|
| 520 |
+
model_dropdown.change(
|
| 521 |
+
fn=switch_model,
|
| 522 |
+
inputs=[model_dropdown],
|
| 523 |
+
outputs=[model_status]
|
| 524 |
+
)
|
| 525 |
+
|
| 526 |
with gr.Tabs():
|
| 527 |
# ============================================================
|
| 528 |
# Tab 1: Interactive Demo
|
requirements.txt
CHANGED
|
@@ -12,6 +12,7 @@ sentencepiece==0.1.99
|
|
| 12 |
gradio # Latest version with all security updates
|
| 13 |
spaces>=0.19.4
|
| 14 |
huggingface_hub>=0.20.0 # For HF authentication and model downloads
|
|
|
|
| 15 |
|
| 16 |
# Model dependencies
|
| 17 |
accelerate==0.21.0
|
|
|
|
| 12 |
gradio # Latest version with all security updates
|
| 13 |
spaces>=0.19.4
|
| 14 |
huggingface_hub>=0.20.0 # For HF authentication and model downloads
|
| 15 |
+
datasets>=2.14.0 # For loading benchmark datasets
|
| 16 |
|
| 17 |
# Model dependencies
|
| 18 |
accelerate==0.21.0
|