Spaces:

ColdSlim
/

Dermatology-AI-Assistant

Sleeping

App Files Files Community

ColdSlim commited on Oct 13, 2025

Commit

a79b20b

verified ·

1 Parent(s): 25a237f

Update app.py

Browse files

Files changed (1) hide show

app.py +86 -34

app.py CHANGED Viewed

@@ -1,5 +1,8 @@
 # app.py
 # Dermatology-AI-Assistant — Hugging Face Space (ZeroGPU-ready)
 import os
 import logging
@@ -22,29 +25,33 @@ logger = logging.getLogger(__name__)
 # Config
 # ---------------------------
 MODEL_ID = os.environ.get("MODEL_ID", "ColdSlim/Dermatology-Qwen2.5-VL-3B")
 GEN_KW = dict(
     max_new_tokens=512,
     do_sample=True,
     temperature=0.7,
     top_p=0.9,
 )
 ZGPU_DURATION = int(os.environ.get("ZGPU_DURATION", "180"))
 logger.info(f"Loading processor from: {MODEL_ID}")
 processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True)
 logger.info("Processor loaded.")
 # ---------------------------
 # Helpers
 # ---------------------------
-def build_inputs(image: Image.Image, question: str):
-    """
-    Build Qwen-style multimodal chat inputs using qwen-vl-utils.
-    Returns a dict of tensors ready for model.generate.
-    """
-    messages = [
         {
             "role": "user",
             "content": [
@@ -54,16 +61,45 @@ def build_inputs(image: Image.Image, question: str):
         }
     ]
     text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
     image_inputs, video_inputs = process_vision_info(messages)
-    inputs = processor(
-        text=[text],
-        images=image_inputs,
-        videos=video_inputs,
-        padding=True,
-        return_tensors="pt",
-    )
     return inputs
 def format_derm_disclaimer(ans: str) -> str:
@@ -82,9 +118,11 @@ def analyze_skin_condition(image: Optional[Image.Image], question: str) -> str:
     """
     Runs inside a ZeroGPU reservation window.
     Loads model on GPU, generates, frees VRAM.
     """
     if image is None:
         return "❌ Please upload an image first."
     try:
         logger.info(f"Loading model on GPU: {MODEL_ID}")
         model = Qwen2VLForConditionalGeneration.from_pretrained(
@@ -93,26 +131,40 @@ def analyze_skin_condition(image: Optional[Image.Image], question: str) -> str:
             device_map="cuda",
             trust_remote_code=True,
             low_cpu_mem_usage=True,
-            ignore_mismatched_sizes=True,  # keep until your weights match exactly
         )
         logger.info("Model loaded successfully!")
-        inputs = build_inputs(image, question)
-        inputs = {k: v.to("cuda") if isinstance(v, torch.Tensor) else v for k, v in inputs.items()}
-        with torch.no_grad():
-            out_ids = model.generate(
-                **inputs,
-                **GEN_KW,
-                pad_token_id=processor.tokenizer.eos_token_id,
-            )
-        # strip prompt tokens before decoding
-        trimmed = [o[len(i):] for i, o in zip(inputs["input_ids"], out_ids)]
-        text = processor.batch_decode(
-            trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
-        )[0]
         del model
         torch.cuda.empty_cache()
@@ -149,7 +201,7 @@ def create_interface() -> gr.Blocks:
         submit_btn.click(fn=analyze_skin_condition, inputs=[image_input, question_input], outputs=output_box, queue=True)
         clear_btn.click(fn=lambda: (None, ""), inputs=None, outputs=[image_input, question_input])
-        # Gradio 4.44.1: call queue() with no keyword args
         demo.queue()
         gr.Markdown("Tips: Ensure good lighting and focus. Avoid uploading personally identifying information.")
@@ -164,7 +216,7 @@ def main():
         show_error=True,
         inbrowser=False,
         quiet=False,
-        ssr_mode=False,  # disable SSR to avoid Node 20 requirement in container
     )
 if __name__ == "__main__":

 # app.py
 # Dermatology-AI-Assistant — Hugging Face Space (ZeroGPU-ready)
+# - Uses qwen-vl-utils for vision inputs
+# - Acquires ZeroGPU only during inference
+# - Handles Qwen2-VL token/feature mismatch with a safe fallback retry
 import os
 import logging
 # Config
 # ---------------------------
 MODEL_ID = os.environ.get("MODEL_ID", "ColdSlim/Dermatology-Qwen2.5-VL-3B")
 GEN_KW = dict(
     max_new_tokens=512,
     do_sample=True,
     temperature=0.7,
     top_p=0.9,
 )
 ZGPU_DURATION = int(os.environ.get("ZGPU_DURATION", "180"))
 logger.info(f"Loading processor from: {MODEL_ID}")
 processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True)
+# (Optional) Tame resolution to reduce tiling variance; adjust if you like.
+if hasattr(processor, "image_processor"):
+    try:
+        # Keep images within a predictable pixel band so placeholder count is stable.
+        processor.image_processor.max_pixels = int(os.environ.get("QWEN_MAX_PIXELS", "1500000"))  # ~1.5MP
+        processor.image_processor.min_pixels = int(os.environ.get("QWEN_MIN_PIXELS", "262144"))   # 512x512
+    except Exception:
+        pass
 logger.info("Processor loaded.")
 # ---------------------------
 # Helpers
 # ---------------------------
+def _messages(image: Image.Image, question: str):
+    return [
         {
             "role": "user",
             "content": [
         }
     ]
+def build_inputs(image: Image.Image, question: str, *, disable_splitting: bool = False):
+    """
+    Build Qwen-style multimodal chat inputs.
+    When disable_splitting is True, we hint the image processor to avoid tiling,
+    which can fix token/feature mismatches for some edge cases.
+    """
+    messages = _messages(image, question)
+    # Apply chat template (inserts <image> placeholders automatically)
     text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+    # Prepare vision inputs
     image_inputs, video_inputs = process_vision_info(messages)
+    # Optionally force-disable splitting (fallback path)
+    if disable_splitting and hasattr(processor, "image_processor"):
+        ip = processor.image_processor
+        # Cache old setting to not mutate global defaults permanently
+        prev = getattr(ip, "do_image_splitting", None)
+        try:
+            if hasattr(ip, "do_image_splitting"):
+                ip.do_image_splitting = False
+            inputs = processor(
+                text=[text],
+                images=image_inputs,
+                videos=video_inputs,
+                return_tensors="pt",   # <- no padding for single-sample path
+            )
+        finally:
+            if prev is not None:
+                ip.do_image_splitting = prev
+    else:
+        inputs = processor(
+            text=[text],
+            images=image_inputs,
+            videos=video_inputs,
+            return_tensors="pt",       # <- no padding to avoid mask quirks
+        )
     return inputs
 def format_derm_disclaimer(ans: str) -> str:
     """
     Runs inside a ZeroGPU reservation window.
     Loads model on GPU, generates, frees VRAM.
+    Includes a fallback retry if Qwen raises a token/feature mismatch.
     """
     if image is None:
         return "❌ Please upload an image first."
     try:
         logger.info(f"Loading model on GPU: {MODEL_ID}")
         model = Qwen2VLForConditionalGeneration.from_pretrained(
             device_map="cuda",
             trust_remote_code=True,
             low_cpu_mem_usage=True,
+            ignore_mismatched_sizes=True,  # keep until weights align perfectly
         )
         logger.info("Model loaded successfully!")
+        def _run_infer(disable_splitting: bool = False) -> str:
+            inputs = build_inputs(image, question, disable_splitting=disable_splitting)
+            # Move tensors to CUDA
+            inputs = {k: v.to("cuda") if isinstance(v, torch.Tensor) else v for k, v in inputs.items()}
+            with torch.no_grad():
+                out_ids = model.generate(
+                    **inputs,
+                    **GEN_KW,
+                    pad_token_id=processor.tokenizer.eos_token_id,
+                )
+            # Strip prompt tokens before decoding
+            trimmed = [o[len(i):] for i, o in zip(inputs["input_ids"], out_ids)]
+            text = processor.batch_decode(
+                trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
+            )[0]
+            return text
+        # First attempt: normal path
+        try:
+            text = _run_infer(disable_splitting=False)
+        except ValueError as ve:
+            msg = str(ve)
+            # Known Qwen2-VL edge case: token/feature mismatch — retry with splitting disabled
+            if "Image features and image tokens do not match" in msg:
+                logger.warning("Token/feature mismatch detected — retrying with image splitting disabled.")
+                text = _run_infer(disable_splitting=True)
+            else:
+                raise
+        # Free VRAM
         del model
         torch.cuda.empty_cache()
         submit_btn.click(fn=analyze_skin_condition, inputs=[image_input, question_input], outputs=output_box, queue=True)
         clear_btn.click(fn=lambda: (None, ""), inputs=None, outputs=[image_input, question_input])
+        # Gradio 4.44.1: simple queue() call (no kwargs)
         demo.queue()
         gr.Markdown("Tips: Ensure good lighting and focus. Avoid uploading personally identifying information.")
         show_error=True,
         inbrowser=False,
         quiet=False,
+        ssr_mode=False,  # avoid Node 20 requirement in container
     )
 if __name__ == "__main__":