Spaces:

ColdSlim
/

Dermatology-AI-Assistant

Build error

App Files Files Community

ColdSlim commited on Oct 13, 2025

Commit

e79ec61

verified ·

1 Parent(s): 3ad2951

Update app.py

Browse files

Files changed (1) hide show

app.py +27 -22

app.py CHANGED Viewed

@@ -1,9 +1,10 @@
 # app.py
 # Dermatology-AI-Assistant — HF Spaces (ZeroGPU)
-# - Uses AutoModelForCausalLM + trust_remote_code to avoid class import issues
-# - Robust FT->Base fallback on load/mismatch
-# - qwen-vl-utils for vision preprocessing
 # - ZeroGPU only during inference
 import os
 import logging
@@ -31,23 +32,26 @@ GEN_KW = dict(
     temperature=0.7,
     top_p=0.9,
 )
 ZGPU_DURATION = int(os.environ.get("ZGPU_DURATION", "180"))
-# Preload only FT processor on CPU (we may swap to base in fallback)
-logger.info(f"Loading processor from: {FT_MODEL_ID}")
-ft_processor = AutoProcessor.from_pretrained(FT_MODEL_ID, trust_remote_code=True)
-logger.info("Processor loaded.")
 def _tune_image_processor(proc):
     if hasattr(proc, "image_processor"):
         try:
             proc.image_processor.max_pixels = int(os.environ.get("QWEN_MAX_PIXELS", "1500000"))  # ~1.5MP
             proc.image_processor.min_pixels = int(os.environ.get("QWEN_MIN_PIXELS", "262144"))   # 512x512
         except Exception:
             pass
-_tune_image_processor(ft_processor)
 # ---------------------------
 # Helpers
@@ -67,7 +71,7 @@ def build_inputs(processor: AutoProcessor, image: Image.Image, question: str):
     messages = _messages(image, question)
     text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
     image_inputs, video_inputs = process_vision_info(messages)
-    # single-sample → no padding to avoid mask quirks
     inputs = processor(text=[text], images=image_inputs, videos=video_inputs, return_tensors="pt")
     return inputs
@@ -78,7 +82,7 @@ def _pad_token_id(processor, model):
     return getattr(getattr(model, "config", None), "eos_token_id", 0)
 def _generate_text(model, processor, inputs: dict) -> str:
-    # move tensors to CUDA
     inputs = {k: v.to("cuda") if isinstance(v, torch.Tensor) else v for k, v in inputs.items()}
     with torch.no_grad():
         out_ids = model.generate(
@@ -98,9 +102,9 @@ def format_derm_disclaimer(ans: str) -> str:
     )
     return ans + tail
-def try_load_model(model_id: str, *, allow_mismatch: bool = True) -> Tuple[Optional[AutoModelForCausalLM], Optional[str]]:
     """
-    Load Qwen2.5-VL via AutoModelForCausalLM with trust_remote_code (works across transformers versions).
     """
     try:
         logger.info(f"Loading model on GPU: {model_id}")
@@ -108,7 +112,7 @@ def try_load_model(model_id: str, *, allow_mismatch: bool = True) -> Tuple[Optio
             model_id,
             torch_dtype=torch.float16,
             device_map="cuda",
-            trust_remote_code=True,          # let repo code provide the class if needed
             low_cpu_mem_usage=True,
             ignore_mismatched_sizes=allow_mismatch,
             offload_state_dict=True,
@@ -125,21 +129,25 @@ def try_load_model(model_id: str, *, allow_mismatch: bool = True) -> Tuple[Optio
 @spaces.GPU(duration=ZGPU_DURATION)
 def analyze_skin_condition(image: Optional[Image.Image], question: str) -> str:
     """
-    Try FT model first; on ANY load/generation error, fall back to base model+processor.
     """
     if image is None:
         return "❌ Please upload an image first."
     model = None
     try:
         # Attempt 1: fine-tuned model
         model, ft_err = try_load_model(FT_MODEL_ID, allow_mismatch=True)
         if model is not None:
             try:
-                inputs = build_inputs(ft_processor, image, question)
-                text = _generate_text(model, ft_processor, inputs)
                 return format_derm_disclaimer(text)
             except ValueError as ve:
                 if "Image features and image tokens do not match" in str(ve):
                     logger.warning("Token/feature mismatch on FT model — falling back to base.")
                 else:
@@ -155,15 +163,12 @@ def analyze_skin_condition(image: Optional[Image.Image], question: str) -> str:
             model = None
             torch.cuda.empty_cache()
-        # Attempt 2: base model + its processor
-        base_processor = AutoProcessor.from_pretrained(BASE_MODEL_ID, trust_remote_code=True)
-        _tune_image_processor(base_processor)
         model, base_err = try_load_model(BASE_MODEL_ID, allow_mismatch=False)
         if model is None:
             return f"❌ Error loading models.\n- FT: {ft_err}\n- BASE: {base_err}"
-        base_inputs = build_inputs(base_processor, image, question)
-        text = _generate_text(model, base_processor, base_inputs)
         return format_derm_disclaimer(text)
     except Exception as e:

 # app.py
 # Dermatology-AI-Assistant — HF Spaces (ZeroGPU)
+# - Processor is ALWAYS from the official base (FT repo lacks a processor)
+# - Tries FT model first, then falls back to base on any load/generation error
+# - Uses qwen-vl-utils for Qwen2.5-VL vision preprocessing
 # - ZeroGPU only during inference
+# - No runtime pip; pin versions in requirements.txt
 import os
 import logging
     temperature=0.7,
     top_p=0.9,
 )
 ZGPU_DURATION = int(os.environ.get("ZGPU_DURATION", "180"))
+# ---------------------------
+# Processor (ALWAYS base)
+# ---------------------------
+logger.info(f"Loading processor from base model: {BASE_MODEL_ID}")
+base_processor = AutoProcessor.from_pretrained(BASE_MODEL_ID, trust_remote_code=True)
+logger.info("Processor loaded from base.")
 def _tune_image_processor(proc):
     if hasattr(proc, "image_processor"):
         try:
+            # Keep image size in a predictable range to stabilize placeholders/tiles.
             proc.image_processor.max_pixels = int(os.environ.get("QWEN_MAX_PIXELS", "1500000"))  # ~1.5MP
             proc.image_processor.min_pixels = int(os.environ.get("QWEN_MIN_PIXELS", "262144"))   # 512x512
+            # If the class exposes splitting flag, prefer default behavior (do not force-disable globally).
         except Exception:
             pass
+_tune_image_processor(base_processor)
 # ---------------------------
 # Helpers
     messages = _messages(image, question)
     text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
     image_inputs, video_inputs = process_vision_info(messages)
+    # Single-sample: no padding to avoid mask quirks.
     inputs = processor(text=[text], images=image_inputs, videos=video_inputs, return_tensors="pt")
     return inputs
     return getattr(getattr(model, "config", None), "eos_token_id", 0)
 def _generate_text(model, processor, inputs: dict) -> str:
+    # Move tensors to CUDA
     inputs = {k: v.to("cuda") if isinstance(v, torch.Tensor) else v for k, v in inputs.items()}
     with torch.no_grad():
         out_ids = model.generate(
     )
     return ans + tail
+def try_load_model(model_id: str, *, allow_mismatch: bool) -> Tuple[Optional[AutoModelForCausalLM], Optional[str]]:
     """
+    Load Qwen2.5-VL via AutoModelForCausalLM with trust_remote_code.
     """
     try:
         logger.info(f"Loading model on GPU: {model_id}")
             model_id,
             torch_dtype=torch.float16,
             device_map="cuda",
+            trust_remote_code=True,
             low_cpu_mem_usage=True,
             ignore_mismatched_sizes=allow_mismatch,
             offload_state_dict=True,
 @spaces.GPU(duration=ZGPU_DURATION)
 def analyze_skin_condition(image: Optional[Image.Image], question: str) -> str:
     """
+    Use base processor for both models.
+    Try FT model first; on ANY load/generation error, fall back to base model.
     """
     if image is None:
         return "❌ Please upload an image first."
     model = None
     try:
+        # Build inputs once (with base processor) — valid for both models
+        inputs = build_inputs(base_processor, image, question)
         # Attempt 1: fine-tuned model
         model, ft_err = try_load_model(FT_MODEL_ID, allow_mismatch=True)
         if model is not None:
             try:
+                text = _generate_text(model, base_processor, inputs)
                 return format_derm_disclaimer(text)
             except ValueError as ve:
+                # Known Qwen placeholder mismatch path — just fall back.
                 if "Image features and image tokens do not match" in str(ve):
                     logger.warning("Token/feature mismatch on FT model — falling back to base.")
                 else:
             model = None
             torch.cuda.empty_cache()
+        # Attempt 2: base model
         model, base_err = try_load_model(BASE_MODEL_ID, allow_mismatch=False)
         if model is None:
             return f"❌ Error loading models.\n- FT: {ft_err}\n- BASE: {base_err}"
+        text = _generate_text(model, base_processor, inputs)
         return format_derm_disclaimer(text)
     except Exception as e: