dung-vpt-uney commited on
Commit
ba64608
·
1 Parent(s): 3564f62

Update Visual-CoT demo - 2025-10-12 23:18:36

Browse files

Fixes:
- Fix LLaVA config registration error (compatibility with newer transformers)
- Update Gradio to latest version (security fixes)
- Auto-deployed via update script

Files changed (2) hide show
  1. app.py +130 -19
  2. requirements.txt +1 -0
app.py CHANGED
@@ -54,21 +54,26 @@ else:
54
  # Configuration
55
  # =============================================================================
56
 
57
- MODEL_PATH = "deepcs233/VisCoT-7b-224" # Hugging Face model ID (smallest version)
 
 
 
 
 
 
 
 
 
58
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
59
 
60
- # Benchmark datasets available
61
- BENCHMARK_DATASETS = [
62
- "docvqa",
63
- "flickr30k",
64
- "gqa",
65
- "infographicsvqa",
66
- "openimages",
67
- "textcap",
68
- "textvqa",
69
- "vsr",
70
- "cub",
71
- ]
72
 
73
  # =============================================================================
74
  # Model Loading (Global - bfloat16)
@@ -101,6 +106,87 @@ print(f"✓ Context length: {context_len}")
101
  print(f"✓ Device: {DEVICE}")
102
 
103
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
  # =============================================================================
105
  # Utility Functions
106
  # =============================================================================
@@ -325,7 +411,7 @@ def create_demo():
325
  .header {
326
  text-align: center;
327
  padding: 20px;
328
- background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
329
  color: white;
330
  border-radius: 10px;
331
  margin-bottom: 20px;
@@ -357,8 +443,9 @@ def create_demo():
357
 
358
  with gr.Blocks(
359
  theme=gr.themes.Soft(
360
- primary_hue="indigo",
361
- secondary_hue="purple",
 
362
  ),
363
  css=custom_css,
364
  title="Visual-CoT Demo"
@@ -405,13 +492,37 @@ def create_demo():
405
  gr.HTML("""
406
  <div class="info-box">
407
  <p style="margin: 0; font-size: 14px;">
408
- 🔐 <strong>Authentication Required:</strong> This Space uses Zero GPU which requires you to be logged in to Hugging Face.
409
- If you see quota errors, please <a href="https://huggingface.co/login" target="_blank">login</a> or
410
- <a href="https://huggingface.co/join" target="_blank">create a free account</a>.
411
  </p>
412
  </div>
413
  """)
414
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
415
  with gr.Tabs():
416
  # ============================================================
417
  # Tab 1: Interactive Demo
 
54
  # Configuration
55
  # =============================================================================
56
 
57
+ # Available models
58
+ AVAILABLE_MODELS = {
59
+ "VisCoT-7B-224 (Fastest)": "deepcs233/VisCoT-7b-224",
60
+ "VisCoT-7B-336 (Balanced)": "deepcs233/VisCoT-7b-336",
61
+ "VisCoT-13B-224 (Better)": "deepcs233/VisCoT-13b-224",
62
+ "VisCoT-13B-336 (Best)": "deepcs233/VisCoT-13b-336",
63
+ }
64
+
65
+ MODEL_PATH = "deepcs233/VisCoT-7b-224" # Default: smallest/fastest
66
+ CURRENT_MODEL_NAME = "VisCoT-7B-224 (Fastest)"
67
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
68
 
69
+ # Benchmark datasets from HF collection
70
+ BENCHMARK_DATASETS = {
71
+ "GQA": "tuandunghcmut/gqa_cot",
72
+ "TextVQA": "tuandunghcmut/textvqa_cot",
73
+ "DocVQA": "tuandunghcmut/docvqa_cot",
74
+ "Flickr30K": "tuandunghcmut/flickr30k_cot",
75
+ "InfographicsVQA": "tuandunghcmut/infographicsvqa_cot",
76
+ }
 
 
 
 
77
 
78
  # =============================================================================
79
  # Model Loading (Global - bfloat16)
 
106
  print(f"✓ Device: {DEVICE}")
107
 
108
 
109
+ # =============================================================================
110
+ # Model Management Functions
111
+ # =============================================================================
112
+
113
+ def switch_model(model_choice):
114
+ """Switch to a different model"""
115
+ global tokenizer, model, image_processor, context_len, MODEL_PATH, CURRENT_MODEL_NAME
116
+
117
+ try:
118
+ new_model_path = AVAILABLE_MODELS[model_choice]
119
+
120
+ if new_model_path == MODEL_PATH:
121
+ return f"Already using {model_choice}"
122
+
123
+ print(f"\n🔄 Switching to {model_choice}...")
124
+ disable_torch_init()
125
+
126
+ model_name = get_model_name_from_path(new_model_path)
127
+
128
+ # Load new model
129
+ tokenizer, model, image_processor, context_len = load_pretrained_model(
130
+ new_model_path,
131
+ None,
132
+ model_name,
133
+ load_8bit=False,
134
+ load_4bit=False,
135
+ device=DEVICE,
136
+ )
137
+
138
+ # Ensure bfloat16
139
+ if DEVICE == "cuda":
140
+ model = model.to(dtype=torch.bfloat16)
141
+
142
+ MODEL_PATH = new_model_path
143
+ CURRENT_MODEL_NAME = model_choice
144
+
145
+ print(f"✓ Switched to {model_choice}")
146
+ return f"✓ Successfully switched to {model_choice}\nModel: {model_name}\nDevice: {DEVICE}"
147
+
148
+ except Exception as e:
149
+ import traceback
150
+ error_msg = f"❌ Failed to switch model: {str(e)}\n{traceback.format_exc()}"
151
+ print(error_msg)
152
+ return error_msg
153
+
154
+ # =============================================================================
155
+ # Benchmark Loading Functions
156
+ # =============================================================================
157
+
158
+ def load_benchmark_example(dataset_name, index=0):
159
+ """Load an example from benchmark dataset"""
160
+ try:
161
+ from datasets import load_dataset
162
+
163
+ dataset_path = BENCHMARK_DATASETS.get(dataset_name)
164
+ if not dataset_path:
165
+ return None, "Dataset not found", "", ""
166
+
167
+ # Load dataset
168
+ dataset = load_dataset(dataset_path, split="train")
169
+
170
+ if index >= len(dataset):
171
+ index = 0
172
+
173
+ example = dataset[index]
174
+
175
+ # Extract fields
176
+ image = example.get("image")
177
+ question = example.get("question", "")
178
+ bbox = example.get("bbox", "")
179
+ answer = example.get("answer", "")
180
+
181
+ info = f"Dataset: {dataset_name} | Example {index + 1}/{len(dataset)}"
182
+
183
+ return image, question, bbox, answer, info
184
+
185
+ except Exception as e:
186
+ error_msg = f"Error loading benchmark: {str(e)}"
187
+ print(error_msg)
188
+ return None, error_msg, "", "", ""
189
+
190
  # =============================================================================
191
  # Utility Functions
192
  # =============================================================================
 
411
  .header {
412
  text-align: center;
413
  padding: 20px;
414
+ background: linear-gradient(135deg, #475569 0%, #334155 100%);
415
  color: white;
416
  border-radius: 10px;
417
  margin-bottom: 20px;
 
443
 
444
  with gr.Blocks(
445
  theme=gr.themes.Soft(
446
+ primary_hue="slate",
447
+ secondary_hue="gray",
448
+ neutral_hue="slate",
449
  ),
450
  css=custom_css,
451
  title="Visual-CoT Demo"
 
492
  gr.HTML("""
493
  <div class="info-box">
494
  <p style="margin: 0; font-size: 14px;">
495
+ <strong>Note:</strong> This Space uses Zero GPU which requires authentication.
496
+ Please <a href="https://huggingface.co/login" target="_blank">login</a> or
497
+ <a href="https://huggingface.co/join" target="_blank">create a free account</a> if you encounter quota errors.
498
  </p>
499
  </div>
500
  """)
501
 
502
+ # Model Selector
503
+ with gr.Row():
504
+ with gr.Column(scale=2):
505
+ gr.Markdown("### Model Selection")
506
+ model_dropdown = gr.Dropdown(
507
+ choices=list(AVAILABLE_MODELS.keys()),
508
+ value=CURRENT_MODEL_NAME,
509
+ label="Select Model",
510
+ info="Choose model variant (larger = better quality, slower)"
511
+ )
512
+ with gr.Column(scale=1):
513
+ gr.Markdown("### Current Model Status")
514
+ model_status = gr.Textbox(
515
+ value=f"Active: {CURRENT_MODEL_NAME}",
516
+ label="Status",
517
+ interactive=False
518
+ )
519
+
520
+ model_dropdown.change(
521
+ fn=switch_model,
522
+ inputs=[model_dropdown],
523
+ outputs=[model_status]
524
+ )
525
+
526
  with gr.Tabs():
527
  # ============================================================
528
  # Tab 1: Interactive Demo
requirements.txt CHANGED
@@ -12,6 +12,7 @@ sentencepiece==0.1.99
12
  gradio # Latest version with all security updates
13
  spaces>=0.19.4
14
  huggingface_hub>=0.20.0 # For HF authentication and model downloads
 
15
 
16
  # Model dependencies
17
  accelerate==0.21.0
 
12
  gradio # Latest version with all security updates
13
  spaces>=0.19.4
14
  huggingface_hub>=0.20.0 # For HF authentication and model downloads
15
+ datasets>=2.14.0 # For loading benchmark datasets
16
 
17
  # Model dependencies
18
  accelerate==0.21.0