Spaces:

ColdSlim
/

Dermatology-AI-Assistant

Sleeping

App Files Files Community

ColdSlim commited on Oct 13, 2025

Commit

49e8446

verified ·

1 Parent(s): 8b9a9ad

Update app.py

Browse files

Files changed (1) hide show

app.py +50 -42

app.py CHANGED Viewed

@@ -1,13 +1,13 @@
 # app.py
-# Dermatology-AI-Assistant—HF Spaces (ZeroGPU)
-# - Tries fine-tuned model first; on load/mismatch errors, falls back to base
 # - Uses qwen-vl-utils for vision preprocessing
-# - Acquires ZeroGPU only during inference
 # - No runtime pip; pin versions in requirements.txt
 import os
 import logging
-from typing import Optional
 import gradio as gr
 import spaces
@@ -34,7 +34,7 @@ GEN_KW = dict(
 ZGPU_DURATION = int(os.environ.get("ZGPU_DURATION", "180"))
-# Preload only FT processor on CPU; we may swap to base processor in fallback
 logger.info(f"Loading processor from: {FT_MODEL_ID}")
 ft_processor = AutoProcessor.from_pretrained(FT_MODEL_ID, trust_remote_code=True)
 logger.info("Processor loaded.")
@@ -67,7 +67,7 @@ def build_inputs(processor: AutoProcessor, image: Image.Image, question: str):
     messages = _messages(image, question)
     text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
     image_inputs, video_inputs = process_vision_info(messages)
-    # no padding for single sample to avoid mask quirks
     inputs = processor(text=[text], images=image_inputs, videos=video_inputs, return_tensors="pt")
     return inputs
@@ -98,65 +98,73 @@ def format_derm_disclaimer(ans: str) -> str:
     )
     return ans + tail
 # ---------------------------
 # Inference (ZeroGPU)
 # ---------------------------
 @spaces.GPU(duration=ZGPU_DURATION)
 def analyze_skin_condition(image: Optional[Image.Image], question: str) -> str:
     """
-    Try fine-tuned model first; if load or token/feature mismatch occurs, fall back to base model+processor.
     """
     if image is None:
         return "❌ Please upload an image first."
-    model = None  # ensure defined for finally block
     try:
-        # ------- Attempt 1: Fine-tuned model -------
-        try:
-            logger.info(f"Loading fine-tuned model on GPU: {FT_MODEL_ID}")
-            model = Qwen2VLForConditionalGeneration.from_pretrained(
-                FT_MODEL_ID,
-                torch_dtype=torch.float16,
-                device_map="cuda",
-                trust_remote_code=True,
-                low_cpu_mem_usage=True,
-                ignore_mismatched_sizes=True,  # allow partial head diffs
-                # offload_state_dict can help with odd shards during load
-                offload_state_dict=True,
-            )
-            logger.info("Fine-tuned model loaded.")
-            inputs = build_inputs(ft_processor, image, question)
             try:
                 text = _generate_text(model, ft_processor, inputs)
                 return format_derm_disclaimer(text)
             except ValueError as ve:
-                # Qwen2-VL edge case: placeholder token vs feature mismatch
                 if "Image features and image tokens do not match" in str(ve):
-                    logger.warning("Token/feature mismatch on FT model — switching to base model.")
                 else:
-                    raise
-        except Exception as e:
-            # Any FT load error (e.g., Linear size mismatch) triggers fallback
-            logger.warning(f"Fine-tuned model load failed: {e}. Falling back to base model.")
-        # ------- Attempt 2: Base model & its processor -------
-        # Free FT before loading base
         if model is not None:
             del model
             model = None
             torch.cuda.empty_cache()
-        logger.info(f"Loading BASE model on GPU: {BASE_MODEL_ID}")
         base_processor = AutoProcessor.from_pretrained(BASE_MODEL_ID, trust_remote_code=True)
         _tune_image_processor(base_processor)
-        model = Qwen2VLForConditionalGeneration.from_pretrained(
-            BASE_MODEL_ID,
-            torch_dtype=torch.float16,
-            device_map="cuda",
-            trust_remote_code=True,
-            low_cpu_mem_usage=True,
-        )
-        logger.info("Base model loaded.")
         base_inputs = build_inputs(base_processor, image, question)
         text = _generate_text(model, base_processor, base_inputs)
         return format_derm_disclaimer(text)
@@ -196,7 +204,7 @@ def create_interface() -> gr.Blocks:
         submit_btn.click(fn=analyze_skin_condition, inputs=[image_input, question_input], outputs=output_box, queue=True)
         clear_btn.click(fn=lambda: (None, ""), inputs=None, outputs=[image_input, question_input])
-        # Gradio 4.44.1: simple queue call, no kwargs
         demo.queue()
         gr.Markdown("Tips: Ensure good lighting and focus. Avoid uploading personally identifying information.")

 # app.py
+# Dermatology-AI-Assistant — HF Spaces (ZeroGPU)
+# - Robust FT->Base fallback on ANY model load error (incl. Linear size mismatch)
 # - Uses qwen-vl-utils for vision preprocessing
+# - ZeroGPU only during inference
 # - No runtime pip; pin versions in requirements.txt
 import os
 import logging
+from typing import Optional, Tuple
 import gradio as gr
 import spaces
 ZGPU_DURATION = int(os.environ.get("ZGPU_DURATION", "180"))
+# Preload only the FT processor on CPU (we may swap to base processor if we fall back)
 logger.info(f"Loading processor from: {FT_MODEL_ID}")
 ft_processor = AutoProcessor.from_pretrained(FT_MODEL_ID, trust_remote_code=True)
 logger.info("Processor loaded.")
     messages = _messages(image, question)
     text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
     image_inputs, video_inputs = process_vision_info(messages)
+    # single-sample: no padding to avoid mask quirks
     inputs = processor(text=[text], images=image_inputs, videos=video_inputs, return_tensors="pt")
     return inputs
     )
     return ans + tail
+def try_load_model(model_id: str, *, allow_mismatch: bool = True) -> Tuple[Optional[Qwen2VLForConditionalGeneration], Optional[str]]:
+    """
+    Attempt to load a Qwen2-VL model. Return (model_or_None, error_message_or_None).
+    Any exception is captured and returned instead of bubbling up.
+    """
+    try:
+        logger.info(f"Loading model on GPU: {model_id}")
+        model = Qwen2VLForConditionalGeneration.from_pretrained(
+            model_id,
+            torch_dtype=torch.float16,
+            device_map="cuda",
+            trust_remote_code=True,
+            low_cpu_mem_usage=True,
+            ignore_mismatched_sizes=allow_mismatch,  # let FT load even if some heads differ
+            offload_state_dict=True,                 # helps load large shards reliably
+        )
+        logger.info(f"Model loaded: {model_id}")
+        return model, None
+    except Exception as e:
+        logger.warning(f"Model load failed for {model_id}: {e}")
+        return None, str(e)
 # ---------------------------
 # Inference (ZeroGPU)
 # ---------------------------
 @spaces.GPU(duration=ZGPU_DURATION)
 def analyze_skin_condition(image: Optional[Image.Image], question: str) -> str:
     """
+    Try FT model first; on ANY load error (e.g., Linear size mismatch), fall back to base model+processor.
     """
     if image is None:
         return "❌ Please upload an image first."
+    model = None
     try:
+        # Attempt 1: fine-tuned model
+        model, ft_err = try_load_model(FT_MODEL_ID, allow_mismatch=True)
+        if model is not None:
             try:
+                inputs = build_inputs(ft_processor, image, question)
                 text = _generate_text(model, ft_processor, inputs)
                 return format_derm_disclaimer(text)
             except ValueError as ve:
                 if "Image features and image tokens do not match" in str(ve):
+                    logger.warning("Token/feature mismatch on FT model — falling back to base.")
                 else:
+                    # Unexpected generation error on FT; fall back anyway
+                    logger.warning(f"FT generation error: {ve}. Falling back to base.")
+            except Exception as gen_e:
+                logger.warning(f"FT generation failed: {gen_e}. Falling back to base.")
+        else:
+            logger.warning(f"FT model unavailable, error: {ft_err}. Falling back to base.")
+        # Free FT model (if any) before loading base
         if model is not None:
             del model
             model = None
             torch.cuda.empty_cache()
+        # Attempt 2: base model + its processor
         base_processor = AutoProcessor.from_pretrained(BASE_MODEL_ID, trust_remote_code=True)
         _tune_image_processor(base_processor)
+        model, base_err = try_load_model(BASE_MODEL_ID, allow_mismatch=False)
+        if model is None:
+            # Both loads failed — report combined error
+            return f"❌ Error loading models.\n- FT: {ft_err}\n- BASE: {base_err}"
         base_inputs = build_inputs(base_processor, image, question)
         text = _generate_text(model, base_processor, base_inputs)
         return format_derm_disclaimer(text)
         submit_btn.click(fn=analyze_skin_condition, inputs=[image_input, question_input], outputs=output_box, queue=True)
         clear_btn.click(fn=lambda: (None, ""), inputs=None, outputs=[image_input, question_input])
+        # Gradio 4.44.1: simple queue() call, no kwargs
         demo.queue()
         gr.Markdown("Tips: Ensure good lighting and focus. Avoid uploading personally identifying information.")