Spaces:

ColdSlim
/

Dermatology-AI-Assistant

Sleeping

App Files Files Community

ColdSlim commited on Oct 13, 2025

Commit

25a237f

verified ·

1 Parent(s): fc0a615

Update app.py

Browse files

Files changed (1) hide show

app.py +12 -46

app.py CHANGED Viewed

@@ -1,15 +1,9 @@
 # app.py
 # Dermatology-AI-Assistant — Hugging Face Space (ZeroGPU-ready)
-# - Logging is configured before use
-# - No runtime pip installs (use requirements.txt)
-# - ZeroGPU acquired only during inference via @spaces.GPU
-# - Uses qwen-vl-utils.process_vision_info (fixes missing attribute error)
-# - SSR disabled in Gradio launch to avoid Node 20 requirement in container
 import os
-import sys
 import logging
-from typing import Optional, Tuple
 import gradio as gr
 import spaces
@@ -18,21 +12,17 @@ from PIL import Image
 from transformers import AutoProcessor, Qwen2VLForConditionalGeneration
 from qwen_vl_utils import process_vision_info
 # ---------------------------
 # Logging
 # ---------------------------
 logging.basicConfig(level=logging.INFO, format="%(levelname)s:%(name)s:%(message)s")
 logger = logging.getLogger(__name__)
 # ---------------------------
 # Config
 # ---------------------------
-# Fine-tuned (or partially fine-tuned) Qwen VL checkpoint
 MODEL_ID = os.environ.get("MODEL_ID", "ColdSlim/Dermatology-Qwen2.5-VL-3B")
-# Generation params (tweak as needed)
 GEN_KW = dict(
     max_new_tokens=512,
     do_sample=True,
@@ -40,15 +30,12 @@ GEN_KW = dict(
     top_p=0.9,
 )
-# ZeroGPU time (seconds). Increase if your model is slow to generate.
 ZGPU_DURATION = int(os.environ.get("ZGPU_DURATION", "180"))
-# Preload only the processor on CPU; load the model inside GPU-decorated call.
 logger.info(f"Loading processor from: {MODEL_ID}")
 processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True)
 logger.info("Processor loaded.")
 # ---------------------------
 # Helpers
 # ---------------------------
@@ -67,13 +54,9 @@ def build_inputs(image: Image.Image, question: str):
         }
     ]
-    # Chat template
     text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
-    # Vision inputs
     image_inputs, video_inputs = process_vision_info(messages)
-    # Pack tensors (CPU for now; we move to CUDA later)
     inputs = processor(
         text=[text],
         images=image_inputs,
@@ -83,9 +66,7 @@ def build_inputs(image: Image.Image, question: str):
     )
     return inputs
 def format_derm_disclaimer(ans: str) -> str:
-    """Append a short medical disclaimer (non-blocking)."""
     tail = (
         "\n\n---\n"
         "_Disclaimer: This AI is not a medical device. The output is informational and may be inaccurate. "
@@ -93,37 +74,32 @@ def format_derm_disclaimer(ans: str) -> str:
     )
     return ans + tail
 # ---------------------------
 # Inference (ZeroGPU)
 # ---------------------------
 @spaces.GPU(duration=ZGPU_DURATION)
 def analyze_skin_condition(image: Optional[Image.Image], question: str) -> str:
     """
-    Main inference function. Runs inside a ZeroGPU reservation window.
     Loads model on GPU, generates, frees VRAM.
     """
     if image is None:
         return "❌ Please upload an image first."
     try:
         logger.info(f"Loading model on GPU: {MODEL_ID}")
-        # On ZeroGPU, load inside the GPU-decorated function
         model = Qwen2VLForConditionalGeneration.from_pretrained(
             MODEL_ID,
-            torch_dtype=torch.float16,          # fp16 is broadly compatible on ZeroGPU
-            device_map="cuda",                  # place modules on available CUDA
             trust_remote_code=True,
             low_cpu_mem_usage=True,
-            ignore_mismatched_sizes=True,       # your logs indicated shape diffs; keep this to avoid crash
         )
         logger.info("Model loaded successfully!")
-        # Build and move inputs to CUDA
         inputs = build_inputs(image, question)
         inputs = {k: v.to("cuda") if isinstance(v, torch.Tensor) else v for k, v in inputs.items()}
-        # Generate
         with torch.no_grad():
             out_ids = model.generate(
                 **inputs,
@@ -131,15 +107,12 @@ def analyze_skin_condition(image: Optional[Image.Image], question: str) -> str:
                 pad_token_id=processor.tokenizer.eos_token_id,
             )
-        # Strip prompt tokens before decoding for clean answer
-        prompt_len_trimmed = [
-            out[len(inp):] for inp, out in zip(inputs["input_ids"], out_ids)
-        ]
         text = processor.batch_decode(
-            prompt_len_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
         )[0]
-        # Free VRAM early
         del model
         torch.cuda.empty_cache()
@@ -149,7 +122,6 @@ def analyze_skin_condition(image: Optional[Image.Image], question: str) -> str:
         logger.exception("Error during inference")
         return f"❌ Error analyzing image: {e}"
 # ---------------------------
 # UI
 # ---------------------------
@@ -174,20 +146,15 @@ def create_interface() -> gr.Blocks:
         output_box = gr.Textbox(label="Response", lines=16)
-        # Wire events
         submit_btn.click(fn=analyze_skin_condition, inputs=[image_input, question_input], outputs=output_box, queue=True)
         clear_btn.click(fn=lambda: (None, ""), inputs=None, outputs=[image_input, question_input])
-        # Queue for concurrency control (ZeroGPU friendly)
-        demo.queue(concurrency_count=1, status_update_rate=1)
-        gr.Markdown(
-            "Tips: Ensure good lighting and focus. Avoid uploading personally identifying information."
-        )
     return demo
 def main():
     demo = create_interface()
     demo.launch(
@@ -197,9 +164,8 @@ def main():
         show_error=True,
         inbrowser=False,
         quiet=False,
-        ssr_mode=False,  # disable SSR to avoid Node 20 requirement in Spaces container
     )
 if __name__ == "__main__":
     main()

 # app.py
 # Dermatology-AI-Assistant — Hugging Face Space (ZeroGPU-ready)
 import os
 import logging
+from typing import Optional
 import gradio as gr
 import spaces
 from transformers import AutoProcessor, Qwen2VLForConditionalGeneration
 from qwen_vl_utils import process_vision_info
 # ---------------------------
 # Logging
 # ---------------------------
 logging.basicConfig(level=logging.INFO, format="%(levelname)s:%(name)s:%(message)s")
 logger = logging.getLogger(__name__)
 # ---------------------------
 # Config
 # ---------------------------
 MODEL_ID = os.environ.get("MODEL_ID", "ColdSlim/Dermatology-Qwen2.5-VL-3B")
 GEN_KW = dict(
     max_new_tokens=512,
     do_sample=True,
     top_p=0.9,
 )
 ZGPU_DURATION = int(os.environ.get("ZGPU_DURATION", "180"))
 logger.info(f"Loading processor from: {MODEL_ID}")
 processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True)
 logger.info("Processor loaded.")
 # ---------------------------
 # Helpers
 # ---------------------------
         }
     ]
     text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
     image_inputs, video_inputs = process_vision_info(messages)
     inputs = processor(
         text=[text],
         images=image_inputs,
     )
     return inputs
 def format_derm_disclaimer(ans: str) -> str:
     tail = (
         "\n\n---\n"
         "_Disclaimer: This AI is not a medical device. The output is informational and may be inaccurate. "
     )
     return ans + tail
 # ---------------------------
 # Inference (ZeroGPU)
 # ---------------------------
 @spaces.GPU(duration=ZGPU_DURATION)
 def analyze_skin_condition(image: Optional[Image.Image], question: str) -> str:
     """
+    Runs inside a ZeroGPU reservation window.
     Loads model on GPU, generates, frees VRAM.
     """
     if image is None:
         return "❌ Please upload an image first."
     try:
         logger.info(f"Loading model on GPU: {MODEL_ID}")
         model = Qwen2VLForConditionalGeneration.from_pretrained(
             MODEL_ID,
+            torch_dtype=torch.float16,
+            device_map="cuda",
             trust_remote_code=True,
             low_cpu_mem_usage=True,
+            ignore_mismatched_sizes=True,  # keep until your weights match exactly
         )
         logger.info("Model loaded successfully!")
         inputs = build_inputs(image, question)
         inputs = {k: v.to("cuda") if isinstance(v, torch.Tensor) else v for k, v in inputs.items()}
         with torch.no_grad():
             out_ids = model.generate(
                 **inputs,
                 pad_token_id=processor.tokenizer.eos_token_id,
             )
+        # strip prompt tokens before decoding
+        trimmed = [o[len(i):] for i, o in zip(inputs["input_ids"], out_ids)]
         text = processor.batch_decode(
+            trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
         )[0]
         del model
         torch.cuda.empty_cache()
         logger.exception("Error during inference")
         return f"❌ Error analyzing image: {e}"
 # ---------------------------
 # UI
 # ---------------------------
         output_box = gr.Textbox(label="Response", lines=16)
         submit_btn.click(fn=analyze_skin_condition, inputs=[image_input, question_input], outputs=output_box, queue=True)
         clear_btn.click(fn=lambda: (None, ""), inputs=None, outputs=[image_input, question_input])
+        # Gradio 4.44.1: call queue() with no keyword args
+        demo.queue()
+        gr.Markdown("Tips: Ensure good lighting and focus. Avoid uploading personally identifying information.")
     return demo
 def main():
     demo = create_interface()
     demo.launch(
         show_error=True,
         inbrowser=False,
         quiet=False,
+        ssr_mode=False,  # disable SSR to avoid Node 20 requirement in container
     )
 if __name__ == "__main__":
     main()