Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -4,7 +4,7 @@
|
|
| 4 |
# - Uses qwen-vl-utils + AutoProcessor (multimodal) with trust_remote_code, use_fast=False
|
| 5 |
# - Deterministic decoding for stable eval
|
| 6 |
# - ZeroGPU only during inference (ALL CUDA work happens inside @spaces.GPU functions)
|
| 7 |
-
# -
|
| 8 |
|
| 9 |
import os
|
| 10 |
import logging
|
|
@@ -139,6 +139,8 @@ def try_load_model():
|
|
| 139 |
if hasattr(model, "get_active_adapters"):
|
| 140 |
logger.info(f"Active adapters: {model.get_active_adapters()}")
|
| 141 |
logger.info(f"PEFT config present: {hasattr(model, 'peft_config')}")
|
|
|
|
|
|
|
| 142 |
except Exception:
|
| 143 |
pass
|
| 144 |
logger.info("LoRA adapters attached and active (not merged).")
|
|
@@ -204,6 +206,21 @@ def compare_with_without_lora(model, inputs):
|
|
| 204 |
|
| 205 |
return with_lora, without_lora
|
| 206 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 207 |
# ---------------------------
|
| 208 |
# Inference (ZeroGPU)
|
| 209 |
# ---------------------------
|
|
@@ -257,6 +274,42 @@ def debug_compare_lora(image: Optional[Image.Image], question: str) -> str:
|
|
| 257 |
del model
|
| 258 |
torch.cuda.empty_cache()
|
| 259 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 260 |
# ---------------------------
|
| 261 |
# UI
|
| 262 |
# ---------------------------
|
|
@@ -283,9 +336,11 @@ def create_interface() -> gr.Blocks:
|
|
| 283 |
|
| 284 |
# Debug: LoRA ON vs OFF (GPU-decorated function)
|
| 285 |
with gr.Row():
|
| 286 |
-
|
| 287 |
-
|
| 288 |
-
|
|
|
|
|
|
|
| 289 |
|
| 290 |
demo.queue()
|
| 291 |
gr.Markdown("Tips: Ensure good lighting and focus. Avoid uploading personally identifying information.")
|
|
|
|
| 4 |
# - Uses qwen-vl-utils + AutoProcessor (multimodal) with trust_remote_code, use_fast=False
|
| 5 |
# - Deterministic decoding for stable eval
|
| 6 |
# - ZeroGPU only during inference (ALL CUDA work happens inside @spaces.GPU functions)
|
| 7 |
+
# - Debug tools: (1) LoRA ON vs OFF toggle; (2) LoRA ON vs pure BASE
|
| 8 |
|
| 9 |
import os
|
| 10 |
import logging
|
|
|
|
| 139 |
if hasattr(model, "get_active_adapters"):
|
| 140 |
logger.info(f"Active adapters: {model.get_active_adapters()}")
|
| 141 |
logger.info(f"PEFT config present: {hasattr(model, 'peft_config')}")
|
| 142 |
+
if hasattr(model, "peft_config"):
|
| 143 |
+
logger.info(f"PEFT keys: {list(model.peft_config.keys())}")
|
| 144 |
except Exception:
|
| 145 |
pass
|
| 146 |
logger.info("LoRA adapters attached and active (not merged).")
|
|
|
|
| 206 |
|
| 207 |
return with_lora, without_lora
|
| 208 |
|
| 209 |
+
def load_base_only_model():
|
| 210 |
+
"""
|
| 211 |
+
Load a fresh BASE model (no adapters) for direct comparison.
|
| 212 |
+
Call ONLY inside GPU-decorated functions.
|
| 213 |
+
"""
|
| 214 |
+
model = AutoModelForVision2Seq.from_pretrained(
|
| 215 |
+
BASE_MODEL_ID,
|
| 216 |
+
torch_dtype=torch.float16,
|
| 217 |
+
device_map="cuda",
|
| 218 |
+
trust_remote_code=True,
|
| 219 |
+
low_cpu_mem_usage=True,
|
| 220 |
+
)
|
| 221 |
+
model.eval()
|
| 222 |
+
return model
|
| 223 |
+
|
| 224 |
# ---------------------------
|
| 225 |
# Inference (ZeroGPU)
|
| 226 |
# ---------------------------
|
|
|
|
| 274 |
del model
|
| 275 |
torch.cuda.empty_cache()
|
| 276 |
|
| 277 |
+
# ---------------------------
|
| 278 |
+
# Debug (ZeroGPU-safe): LoRA ON vs pure BASE comparison
|
| 279 |
+
# ---------------------------
|
| 280 |
+
@spaces.GPU(duration=ZGPU_DURATION)
|
| 281 |
+
def debug_compare_vs_base(image: Optional[Image.Image], question: str) -> str:
|
| 282 |
+
if image is None:
|
| 283 |
+
return "Please upload an image first."
|
| 284 |
+
lo_model = None
|
| 285 |
+
base_model = None
|
| 286 |
+
try:
|
| 287 |
+
inputs = build_inputs(image, question)
|
| 288 |
+
|
| 289 |
+
lo_model, warn = try_load_model()
|
| 290 |
+
if lo_model is None:
|
| 291 |
+
return f"Load error (LoRA): {warn}"
|
| 292 |
+
if warn:
|
| 293 |
+
logger.warning(warn)
|
| 294 |
+
|
| 295 |
+
base_model = load_base_only_model()
|
| 296 |
+
|
| 297 |
+
lora_text = _generate_text(lo_model, inputs)
|
| 298 |
+
base_text = _generate_text(base_model, inputs)
|
| 299 |
+
|
| 300 |
+
return (
|
| 301 |
+
"=== LoRA ON (adapters attached) ===\n" + lora_text +
|
| 302 |
+
"\n\n=== BASE ONLY ===\n" + base_text
|
| 303 |
+
)
|
| 304 |
+
except Exception as e:
|
| 305 |
+
logger.exception("Debug compare vs base failed")
|
| 306 |
+
return f"Debug error: {e}"
|
| 307 |
+
finally:
|
| 308 |
+
for m in (lo_model, base_model):
|
| 309 |
+
if m is not None:
|
| 310 |
+
del m
|
| 311 |
+
torch.cuda.empty_cache()
|
| 312 |
+
|
| 313 |
# ---------------------------
|
| 314 |
# UI
|
| 315 |
# ---------------------------
|
|
|
|
| 336 |
|
| 337 |
# Debug: LoRA ON vs OFF (GPU-decorated function)
|
| 338 |
with gr.Row():
|
| 339 |
+
debug_btn1 = gr.Button("Debug: Compare LoRA ON vs OFF")
|
| 340 |
+
debug_btn2 = gr.Button("Debug: Compare LoRA ON vs BASE")
|
| 341 |
+
debug_out = gr.Textbox(label="Debug Output", lines=16)
|
| 342 |
+
debug_btn1.click(fn=debug_compare_lora, inputs=[image_input, question_input], outputs=debug_out, queue=True)
|
| 343 |
+
debug_btn2.click(fn=debug_compare_vs_base, inputs=[image_input, question_input], outputs=debug_out, queue=True)
|
| 344 |
|
| 345 |
demo.queue()
|
| 346 |
gr.Markdown("Tips: Ensure good lighting and focus. Avoid uploading personally identifying information.")
|