Spaces:

ColdSlim
/

Dermatology-AI-Assistant

Sleeping

App Files Files Community

ColdSlim commited on Oct 13, 2025

Commit

e4aafad

verified ·

1 Parent(s): 2157291

Update app.py

Browse files

Files changed (1) hide show

app.py +67 -38

app.py CHANGED Viewed

@@ -1,9 +1,10 @@
 # app.py
 # Dermatology-AI-Assistant — HF Spaces (ZeroGPU, Qwen2.5-VL + LoRA adapters)
-# - Loads base model, then applies LoRA/PEFT adapters from MODEL_ID, merges, and runs multimodal inference
 # - Uses qwen-vl-utils + AutoProcessor (multimodal) with trust_remote_code, use_fast=False
 # - Deterministic decoding for stable eval
-# - ZeroGPU only during inference
 import os
 import logging
@@ -13,7 +14,7 @@ import gradio as gr
 import spaces
 import torch
 from PIL import Image
-from peft import PeftModel  # <-- LoRA/PEFT
 from transformers import AutoProcessor, AutoModelForVision2Seq
 from qwen_vl_utils import process_vision_info
@@ -47,7 +48,7 @@ def _load_multimodal_processor() -> AutoProcessor:
             accepts_images = ("images" in str(sig)) if sig else hasattr(proc, "image_processor")
             if accepts_images and hasattr(proc, "image_processor"):
                 logger.info(f"Loaded multimodal processor from: {mid} ({proc.__class__.__name__})")
-                # optional: stabilize tiling
                 try:
                     proc.image_processor.max_pixels = int(os.environ.get("QWEN_MAX_PIXELS", "1500000"))
                     proc.image_processor.min_pixels = int(os.environ.get("QWEN_MIN_PIXELS", "262144"))
@@ -85,6 +86,10 @@ def build_inputs(image: Image.Image, question: str):
     messages = _messages(image, question)
     text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
     image_inputs, video_inputs = process_vision_info(messages)
     return processor(text=[text], images=image_inputs, videos=video_inputs, return_tensors="pt")
 def _pad_token_id(model):
@@ -92,6 +97,7 @@ def _pad_token_id(model):
     return tid if tid is not None else (getattr(getattr(model, "config", None), "eos_token_id", 0) or 0)
 def _generate_text(model, inputs: dict) -> str:
     inputs = {k: v.to("cuda") if isinstance(v, torch.Tensor) else v for k, v in inputs.items()}
     with torch.no_grad():
         out_ids = model.generate(**inputs, **GEN_KW, pad_token_id=_pad_token_id(model))
@@ -109,6 +115,7 @@ def format_derm_disclaimer(ans: str) -> str:
 # ---------------------------
 # Model loading (LoRA first, then full weights fallback, then base)
 # ---------------------------
 def try_load_model():
     """
@@ -127,7 +134,13 @@ def try_load_model():
         )
         logger.info(f"Attaching LoRA adapters from: {FT_MODEL_ID}")
         model = PeftModel.from_pretrained(base, FT_MODEL_ID, is_trainable=False)
-        # IMPORTANT: do not merge here; keep adapters active so we can toggle on/off for debugging
         logger.info("LoRA adapters attached and active (not merged).")
         model.eval()
         return model, None
@@ -150,7 +163,7 @@ def try_load_model():
     except Exception as e:
         logger.warning(f"Full FT load failed: {e}")
-    # 3) Final fallback: base only
     try:
         logger.info("Falling back to BASE model only.")
         model = AutoModelForVision2Seq.from_pretrained(
@@ -167,19 +180,27 @@ def try_load_model():
 def compare_with_without_lora(model, inputs):
     """
-    Returns (with_lora_text, without_lora_text). Requires PeftModel with adapters active.
     """
-    # Generate WITH LoRA (normal path)
     with_lora = _generate_text(model, inputs)
-    # If model supports toggling adapters, compare WITHOUT LoRA
     without_lora = "[Adapters could not be toggled on this model]"
-    if hasattr(model, "disable_adapter") and hasattr(model, "enable_adapter"):
-        try:
             model.disable_adapter()
             without_lora = _generate_text(model, inputs)
-        finally:
             model.enable_adapter()
     return with_lora, without_lora
@@ -193,7 +214,7 @@ def analyze_skin_condition(image: Optional[Image.Image], question: str) -> str:
     model = None
     try:
         inputs = build_inputs(image, question)
-        model, warn = try_load_model()
         if model is None:
             return "❌ Could not load any model (see logs)."
         if warn:
@@ -208,6 +229,34 @@ def analyze_skin_condition(image: Optional[Image.Image], question: str) -> str:
             del model
         torch.cuda.empty_cache()
 # ---------------------------
 # UI
 # ---------------------------
@@ -232,34 +281,14 @@ def create_interface() -> gr.Blocks:
         submit_btn.click(fn=analyze_skin_condition, inputs=[image_input, question_input], outputs=output_box, queue=True)
         clear_btn.click(fn=lambda: (None, ""), inputs=None, outputs=[image_input, question_input])
-        demo.queue()
-        gr.Markdown("Tips: Ensure good lighting and focus. Avoid uploading personally identifying information.")
         with gr.Row():
             debug_btn = gr.Button("Debug: Compare LoRA ON vs OFF")
         debug_out = gr.Textbox(label="Debug Output", lines=14)
-        def _debug_compare(image, question):
-            if image is None:
-                return "Please upload an image first."
-            try:
-                inputs = build_inputs(image, question)
-                model, warn = try_load_model()
-                if model is None:
-                    return f"Load error: {warn}"
-                if warn:
-                    logger.warning(warn)
-                on_text, off_text = compare_with_without_lora(model, inputs)
-                return (
-                    "=== LoRA ON ===\n" + on_text +
-                    "\n\n=== LoRA OFF ===\n" + off_text
-                )
-            except Exception as e:
-                logger.exception("Debug compare failed")
-                return f"Debug error: {e}"
-        debug_btn.click(_debug_compare, [image_input, question_input], debug_out, queue=True)
     return demo
 def main():
@@ -271,7 +300,7 @@ def main():
         show_error=True,
         inbrowser=False,
         quiet=False,
-        ssr_mode=False,
     )
 if __name__ == "__main__":

 # app.py
 # Dermatology-AI-Assistant — HF Spaces (ZeroGPU, Qwen2.5-VL + LoRA adapters)
+# - Loads base model, then applies LoRA/PEFT adapters from MODEL_ID (kept active; not merged)
 # - Uses qwen-vl-utils + AutoProcessor (multimodal) with trust_remote_code, use_fast=False
 # - Deterministic decoding for stable eval
+# - ZeroGPU only during inference (ALL CUDA work happens inside @spaces.GPU functions)
+# - Includes a ZeroGPU-safe debug tool: "LoRA ON vs OFF" comparison
 import os
 import logging
 import spaces
 import torch
 from PIL import Image
+from peft import PeftModel  # LoRA/PEFT
 from transformers import AutoProcessor, AutoModelForVision2Seq
 from qwen_vl_utils import process_vision_info
             accepts_images = ("images" in str(sig)) if sig else hasattr(proc, "image_processor")
             if accepts_images and hasattr(proc, "image_processor"):
                 logger.info(f"Loaded multimodal processor from: {mid} ({proc.__class__.__name__})")
+                # Optional: stabilize tiling
                 try:
                     proc.image_processor.max_pixels = int(os.environ.get("QWEN_MAX_PIXELS", "1500000"))
                     proc.image_processor.min_pixels = int(os.environ.get("QWEN_MIN_PIXELS", "262144"))
     messages = _messages(image, question)
     text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
     image_inputs, video_inputs = process_vision_info(messages)
+    logger.info(
+        f"vision: images={len(image_inputs) if image_inputs is not None else 0}, "
+        f"first_shape={getattr(image_inputs[0], 'shape', None) if image_inputs else None}"
+    )
     return processor(text=[text], images=image_inputs, videos=video_inputs, return_tensors="pt")
 def _pad_token_id(model):
     return tid if tid is not None else (getattr(getattr(model, "config", None), "eos_token_id", 0) or 0)
 def _generate_text(model, inputs: dict) -> str:
+    # IMPORTANT: This is called only inside GPU-decorated functions.
     inputs = {k: v.to("cuda") if isinstance(v, torch.Tensor) else v for k, v in inputs.items()}
     with torch.no_grad():
         out_ids = model.generate(**inputs, **GEN_KW, pad_token_id=_pad_token_id(model))
 # ---------------------------
 # Model loading (LoRA first, then full weights fallback, then base)
+# NOTE: Do NOT call this outside a @spaces.GPU function, because it loads to CUDA.
 # ---------------------------
 def try_load_model():
     """
         )
         logger.info(f"Attaching LoRA adapters from: {FT_MODEL_ID}")
         model = PeftModel.from_pretrained(base, FT_MODEL_ID, is_trainable=False)
+        # Log adapter visibility
+        try:
+            if hasattr(model, "get_active_adapters"):
+                logger.info(f"Active adapters: {model.get_active_adapters()}")
+            logger.info(f"PEFT config present: {hasattr(model, 'peft_config')}")
+        except Exception:
+            pass
         logger.info("LoRA adapters attached and active (not merged).")
         model.eval()
         return model, None
     except Exception as e:
         logger.warning(f"Full FT load failed: {e}")
+    # 3) Final fallback: base only (keep app usable)
     try:
         logger.info("Falling back to BASE model only.")
         model = AutoModelForVision2Seq.from_pretrained(
 def compare_with_without_lora(model, inputs):
     """
+    Returns (with_lora_text, without_lora_text).
+    Requires adapters active. Tries disable/enable; falls back to set_adapter([]) if available.
     """
+    # WITH LoRA
     with_lora = _generate_text(model, inputs)
+    # WITHOUT LoRA
     without_lora = "[Adapters could not be toggled on this model]"
+    try:
+        if hasattr(model, "disable_adapter") and hasattr(model, "enable_adapter"):
             model.disable_adapter()
             without_lora = _generate_text(model, inputs)
             model.enable_adapter()
+        elif hasattr(model, "set_adapter"):
+            current = model.get_active_adapters() if hasattr(model, "get_active_adapters") else None
+            model.set_adapter([])  # deactivate all
+            without_lora = _generate_text(model, inputs)
+            if current:
+                model.set_adapter(current)
+    except Exception as e:
+        logger.warning(f"Adapter toggle failed: {e}")
     return with_lora, without_lora
     model = None
     try:
         inputs = build_inputs(image, question)
+        model, warn = try_load_model()  # SAFE: inside GPU context
         if model is None:
             return "❌ Could not load any model (see logs)."
         if warn:
             del model
         torch.cuda.empty_cache()
+# ---------------------------
+# Debug (ZeroGPU-safe): LoRA ON vs OFF comparison
+# ---------------------------
+@spaces.GPU(duration=ZGPU_DURATION)
+def debug_compare_lora(image: Optional[Image.Image], question: str) -> str:
+    if image is None:
+        return "Please upload an image first."
+    model = None
+    try:
+        inputs = build_inputs(image, question)
+        model, warn = try_load_model()  # SAFE: inside GPU context
+        if model is None:
+            return f"Load error: {warn}"
+        if warn:
+            logger.warning(warn)
+        on_text, off_text = compare_with_without_lora(model, inputs)
+        return (
+            "=== LoRA ON ===\n" + on_text +
+            "\n\n=== LoRA OFF ===\n" + off_text
+        )
+    except Exception as e:
+        logger.exception("Debug compare failed")
+        return f"Debug error: {e}"
+    finally:
+        if model is not None:
+            del model
+        torch.cuda.empty_cache()
 # ---------------------------
 # UI
 # ---------------------------
         submit_btn.click(fn=analyze_skin_condition, inputs=[image_input, question_input], outputs=output_box, queue=True)
         clear_btn.click(fn=lambda: (None, ""), inputs=None, outputs=[image_input, question_input])
+        # Debug: LoRA ON vs OFF (GPU-decorated function)
         with gr.Row():
             debug_btn = gr.Button("Debug: Compare LoRA ON vs OFF")
         debug_out = gr.Textbox(label="Debug Output", lines=14)
+        debug_btn.click(fn=debug_compare_lora, inputs=[image_input, question_input], outputs=debug_out, queue=True)
+        demo.queue()
+        gr.Markdown("Tips: Ensure good lighting and focus. Avoid uploading personally identifying information.")
     return demo
 def main():
         show_error=True,
         inbrowser=False,
         quiet=False,
+        ssr_mode=False,  # avoid Node requirement in container
     )
 if __name__ == "__main__":