Spaces:

ColdSlim
/

Dermatology-AI-Assistant

Sleeping

App Files Files Community

Manik Sheokand commited on Oct 13, 2025

Commit

421600f

1 Parent(s): bdbb866

Fix: Replace Qwen2VLForConditionalGeneration with AutoModelForCausalLM and update transformers to 4.44.0

Browse files

Files changed (3) hide show

app.py +233 -196
requirements.txt +3 -3
runtime.txt +1 -0

app.py CHANGED Viewed

@@ -1,220 +1,257 @@
-# app.py
-# Dermatology-AI-Assistant — Hugging Face Space (ZeroGPU-ready)
-# - First tries your fine-tuned model
-# - If Qwen raises token/feature mismatch, falls back to official base model
-# - Acquires ZeroGPU only during inference
-# - Uses qwen-vl-utils.process_vision_info
-import os
-import logging
-from typing import Optional
-import gradio as gr
 import spaces
 import torch
 from PIL import Image
-from transformers import AutoProcessor, Qwen2VLForConditionalGeneration
-from qwen_vl_utils import process_vision_info
-logging.basicConfig(level=logging.INFO, format="%(levelname)s:%(name)s:%(message)s")
-logger = logging.getLogger(__name__)
-# ---------------------------
-# Config
-# ---------------------------
-FT_MODEL_ID = os.environ.get("MODEL_ID", "ColdSlim/Dermatology-Qwen2.5-VL-3B")
-BASE_MODEL_ID = os.environ.get("FALLBACK_BASE_MODEL_ID", "Qwen/Qwen2.5-VL-3B-Instruct")
-GEN_KW = dict(
-    max_new_tokens=512,
-    do_sample=True,
-    temperature=0.7,
-    top_p=0.9,
-)
-ZGPU_DURATION = int(os.environ.get("ZGPU_DURATION", "180"))
-# Preload only the fine-tuned processor on CPU; we may swap to base processor in the fallback
-logger.info(f"Loading processor from: {FT_MODEL_ID}")
-ft_processor = AutoProcessor.from_pretrained(FT_MODEL_ID, trust_remote_code=True)
-logger.info("Processor loaded.")
-# Optional: stabilize tiling by constraining pixel range (helps placeholder consistency)
-def _tune_image_processor(proc):
-    if hasattr(proc, "image_processor"):
-        try:
-            proc.image_processor.max_pixels = int(os.environ.get("QWEN_MAX_PIXELS", "1500000"))  # ~1.5MP
-            proc.image_processor.min_pixels = int(os.environ.get("QWEN_MIN_PIXELS", "262144"))   # 512x512
-        except Exception:
-            pass
-_tune_image_processor(ft_processor)
-# ---------------------------
-# Helpers
-# ---------------------------
-def _messages(image: Image.Image, question: str):
-    # ensure RGB to avoid mode surprises
-    if image.mode != "RGB":
-        image = image.convert("RGB")
-    return [
-        {
-            "role": "user",
-            "content": [
-                {"type": "image", "image": image},
-                {"type": "text", "text": question},
-            ],
-        }
-    ]
-def build_inputs(processor: AutoProcessor, image: Image.Image, question: str):
-    """
-    Build Qwen-style multimodal inputs (no padding, batch size 1).
-    """
-    messages = _messages(image, question)
-    text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
-    image_inputs, video_inputs = process_vision_info(messages)
-    inputs = processor(
-        text=[text],
-        images=image_inputs,
-        videos=video_inputs,
-        return_tensors="pt",  # no padding for single sample
-    )
-    return inputs
-def _pad_token_id(processor, model):
-    # Prefer tokenizer.eos if present; else model config; else 0
-    tid = getattr(getattr(processor, "tokenizer", None), "eos_token_id", None)
-    if tid is not None:
-        return tid
-    return getattr(getattr(model, "config", None), "eos_token_id", 0)
-def format_derm_disclaimer(ans: str) -> str:
-    tail = (
-        "\n\n---\n"
-        "_Disclaimer: This AI is not a medical device. The output is informational and may be inaccurate. "
-        "Consult a qualified dermatologist for diagnosis and treatment._"
-    )
-    return ans + tail
-def _generate_text(model, processor, inputs: dict) -> str:
-    # move to CUDA
-    inputs = {k: v.to("cuda") if isinstance(v, torch.Tensor) else v for k, v in inputs.items()}
-    with torch.no_grad():
-        out_ids = model.generate(
-            **inputs,
-            **GEN_KW,
-            pad_token_id=_pad_token_id(processor, model),
-        )
-    trimmed = [o[len(i):] for i, o in zip(inputs["input_ids"], out_ids)]
-    text = processor.batch_decode(trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
-    return text
-# ---------------------------
-# Inference (ZeroGPU)
-# ---------------------------
-@spaces.GPU(duration=ZGPU_DURATION)
-def analyze_skin_condition(image: Optional[Image.Image], question: str) -> str:
-    """
-    Try fine-tuned model first; on token/feature mismatch, fall back to base model+processor.
-    """
-    if image is None:
-        return "❌ Please upload an image first."
-    model = None
     try:
-        # ------- Attempt 1: Fine-tuned model -------
-        logger.info(f"Loading fine-tuned model on GPU: {FT_MODEL_ID}")
-        model = Qwen2VLForConditionalGeneration.from_pretrained(
-            FT_MODEL_ID,
-            torch_dtype=torch.float16,
-            device_map="cuda",
             trust_remote_code=True,
             low_cpu_mem_usage=True,
-            ignore_mismatched_sizes=True,  # your FT ckpt logs suggest some vision head diffs
         )
-        logger.info("Fine-tuned model loaded.")
-        inputs = build_inputs(ft_processor, image, question)
-        try:
-            text = _generate_text(model, ft_processor, inputs)
-            return format_derm_disclaimer(text)
-        except ValueError as ve:
-            msg = str(ve)
-            if "Image features and image tokens do not match" in msg:
-                logger.warning("Token/feature mismatch on fine-tuned model — falling back to base model.")
-            else:
-                raise
-        # ------- Attempt 2: Base model & its processor -------
-        # Free FT model first
-        del model
-        torch.cuda.empty_cache()
-        logger.info(f"Loading BASE model on GPU: {BASE_MODEL_ID}")
-        base_processor = AutoProcessor.from_pretrained(BASE_MODEL_ID, trust_remote_code=True)
-        _tune_image_processor(base_processor)
-        model = Qwen2VLForConditionalGeneration.from_pretrained(
-            BASE_MODEL_ID,
-            torch_dtype=torch.float16,
-            device_map="cuda",
-            trust_remote_code=True,
-            low_cpu_mem_usage=True,
-        )
-        logger.info("Base model loaded.")
-        base_inputs = build_inputs(base_processor, image, question)
-        text = _generate_text(model, base_processor, base_inputs)
-        return format_derm_disclaimer(text)
     except Exception as e:
-        logger.exception("Error during inference")
-        return f"❌ Error analyzing image: {e}"
-    finally:
-        if model is not None:
-            del model
-        torch.cuda.empty_cache()
-# ---------------------------
-# UI
-# ---------------------------
-def create_interface() -> gr.Blocks:
-    with gr.Blocks(title="Dermatology AI Assistant") as demo:
-        gr.Markdown(
-            "# Dermatology AI Assistant\n"
-            "Upload a skin photo and ask a question. The model will provide an informational response."
         )
-        with gr.Row():
-            image_input = gr.Image(type="pil", label="Upload Image (JPG/PNG)")
-            question_input = gr.Textbox(
-                label="Question / Prompt",
-                value="Describe this skin condition in detail and suggest possible next steps.",
-                lines=3,
             )
         with gr.Row():
-            submit_btn = gr.Button("Analyze", variant="primary")
-            clear_btn = gr.Button("Clear")
-        output_box = gr.Textbox(label="Response", lines=16)
-        submit_btn.click(fn=analyze_skin_condition, inputs=[image_input, question_input], outputs=output_box, queue=True)
-        clear_btn.click(fn=lambda: (None, ""), inputs=None, outputs=[image_input, question_input])
-        demo.queue()
-        gr.Markdown("Tips: Ensure good lighting and focus. Avoid uploading personally identifying information.")
     return demo
 def main():
-    demo = create_interface()
-    demo.launch(
-        server_name="0.0.0.0",
-        server_port=7860,
-        share=False,
-        show_error=True,
-        inbrowser=False,
-        quiet=False,
-        ssr_mode=False,
-    )
 if __name__ == "__main__":
     main()

 import spaces
+import gradio as gr
 import torch
+from transformers import AutoProcessor, AutoModelForCausalLM
 from PIL import Image
+import logging
+import subprocess
+import sys
+# Force Gradio update if needed
+def ensure_gradio_version():
+    try:
+        import pkg_resources
+        current_version = pkg_resources.get_distribution("gradio").version
+        if current_version.startswith("4.0"):
+            logger.warning(f"Detected old Gradio version {current_version}, attempting to upgrade...")
+            subprocess.check_call([sys.executable, "-m", "pip", "install", "--upgrade", "gradio==4.44.1"])
+            logger.info("Gradio upgrade completed")
+    except Exception as e:
+        logger.warning(f"Could not check/upgrade Gradio: {e}")
+# Check and upgrade Gradio if needed
+ensure_gradio_version()
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+# Global variables for model and processor
+model = None
+processor = None
+def load_model():
+    """Load the fine-tuned dermatology model"""
+    global model, processor
     try:
+        # Load the merged model (replace with your actual model path)
+        model_name = "ColdSlim/Dermatology-Qwen2.5-VL-3B"  # Update with your actual model name
+        logger.info(f"Loading model: {model_name}")
+        processor = AutoProcessor.from_pretrained(model_name, trust_remote_code=True)
+        model = AutoModelForCausalLM.from_pretrained(
+            model_name,
+            dtype=torch.bfloat16,
+            device_map="auto",
             trust_remote_code=True,
             low_cpu_mem_usage=True,
+            ignore_mismatched_sizes=True
         )
+        logger.info("Model loaded successfully!")
+        return True
     except Exception as e:
+        logger.error(f"Error loading model: {e}")
+        return False
+def analyze_skin_condition(image, question="Describe this skin condition in detail."):
+    """Analyze skin condition from uploaded image"""
+    global model, processor
+    if model is None or processor is None:
+        return "❌ Model not loaded. Please wait for the model to load or contact the administrator."
+    if image is None:
+        return "❌ Please upload an image first."
+    try:
+        # Prepare the conversation
+        messages = [
+            {
+                "role": "user",
+                "content": [
+                    {"type": "image", "image": image},
+                    {"type": "text", "text": question}
+                ]
+            }
+        ]
+        # Process the input
+        text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+        image_inputs, video_inputs = processor.process_vision_info(messages)
+        inputs = processor(
+            text=[text],
+            images=image_inputs,
+            videos=video_inputs,
+            padding=True,
+            return_tensors="pt"
         )
+        # Move inputs to the same device as model
+        inputs = {k: v.to(model.device) if isinstance(v, torch.Tensor) else v for k, v in inputs.items()}
+        # Generate response
+        with torch.no_grad():
+            generated_ids = model.generate(
+                **inputs,
+                max_new_tokens=512,
+                do_sample=True,
+                temperature=0.7,
+                top_p=0.9,
+                pad_token_id=processor.tokenizer.eos_token_id
             )
+        # Decode the response
+        generated_ids_trimmed = [
+            out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
+        ]
+        output_text = processor.batch_decode(
+            generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
+        )[0]
+        return output_text
+    except Exception as e:
+        logger.error(f"Error during inference: {e}")
+        return f"❌ Error analyzing image: {str(e)}"
+def create_interface():
+    """Create the Gradio interface"""
+    # Load model on startup
+    model_loaded = load_model()
+    with gr.Blocks(
+        title="Dermatology AI Assistant",
+        theme=gr.themes.Soft(),
+        css="""
+        .gradio-container {
+            max-width: 1200px !important;
+            margin: auto !important;
+        }
+        .main-header {
+            text-align: center;
+            margin-bottom: 2rem;
+        }
+        .warning-box {
+            background-color: #fff3cd;
+            border: 1px solid #ffeaa7;
+            border-radius: 8px;
+            padding: 1rem;
+            margin: 1rem 0;
+        }
+        """
+    ) as demo:
+        gr.HTML("""
+        <div class="main-header">
+            <h1>🩺 Dermatology AI Assistant</h1>
+            <p>Powered by Qwen2.5-VL-3B fine-tuned for dermatology analysis</p>
+        </div>
+        """)
+        # Warning message
+        gr.HTML("""
+        <div class="warning-box">
+            <h3>⚠️ Medical Disclaimer</h3>
+            <p>This AI assistant is for educational and research purposes only.
+            It should not be used as a substitute for professional medical advice,
+            diagnosis, or treatment. Always consult with a qualified healthcare
+            provider for medical concerns.</p>
+        </div>
+        """)
         with gr.Row():
+            with gr.Column(scale=1):
+                # Image upload
+                image_input = gr.Image(
+                    label="Upload Skin Image",
+                    type="pil",
+                    height=400
+                )
+                # Question input
+                question_input = gr.Textbox(
+                    label="Question (Optional)",
+                    placeholder="Describe this skin condition in detail.",
+                    value="Describe this skin condition in detail.",
+                    lines=3
+                )
+                # Analyze button
+                analyze_btn = gr.Button(
+                    "🔍 Analyze Skin Condition",
+                    variant="primary",
+                    size="lg"
+                )
+                # Example questions
+                gr.HTML("""
+                <h4>💡 Example Questions:</h4>
+                <ul>
+                    <li>What type of skin condition is this?</li>
+                    <li>Describe the characteristics of this lesion.</li>
+                    <li>What are the potential causes of this skin issue?</li>
+                    <li>What should I know about this skin condition?</li>
+                </ul>
+                """)
+            with gr.Column(scale=1):
+                # Output
+                output_text = gr.Textbox(
+                    label="AI Analysis",
+                    lines=15,
+                    max_lines=20,
+                    show_copy_button=True
+                )
+        # Examples
+        gr.Examples(
+            examples=[
+                ["What type of skin condition is this?", "Describe this skin condition in detail."],
+                ["What are the characteristics of this lesion?", "Describe this skin condition in detail."],
+                ["What should I know about this skin issue?", "Describe this skin condition in detail."],
+            ],
+            inputs=[question_input, question_input],
+            label="💡 Example Questions"
+        )
+        # Event handlers
+        analyze_btn.click(
+            fn=analyze_skin_condition,
+            inputs=[image_input, question_input],
+            outputs=output_text
+        )
+        # Model status
+        if model_loaded:
+            gr.HTML("<div style='text-align: center; color: green;'>✅ Model loaded successfully!</div>")
+        else:
+            gr.HTML("<div style='text-align: center; color: red;'>❌ Model loading failed. Please check the logs.</div>")
     return demo
+@spaces.GPU
 def main():
+    """Main function with GPU decorator for Hugging Face Spaces"""
+    try:
+        # Create and launch the interface
+        demo = create_interface()
+        demo.launch(
+            server_name="0.0.0.0",
+            server_port=7860,
+            share=False,
+            show_error=True,
+            inbrowser=False,
+            quiet=False
+        )
+    except Exception as e:
+        logger.error(f"Error launching app: {e}")
+        raise
 if __name__ == "__main__":
     main()

requirements.txt CHANGED Viewed

@@ -3,8 +3,8 @@
 # Core dependencies
 torch>=2.0.0
 torchvision>=0.15.0
-transformers==4.44.2
-accelerate>=0.34.2
 gradio==4.44.1
 huggingface_hub>=0.20.0
 spaces
@@ -14,7 +14,7 @@ Pillow>=9.0.0
 opencv-python>=4.5.0
 # Qwen2-VL specific
-qwen-vl-utils>=0.0.8
 # Optional: For better performance
 flash-attn>=2.0.0

 # Core dependencies
 torch>=2.0.0
 torchvision>=0.15.0
+transformers>=4.44.0
+accelerate>=0.20.0
 gradio==4.44.1
 huggingface_hub>=0.20.0
 spaces
 opencv-python>=4.5.0
 # Qwen2-VL specific
+qwen-vl-utils>=0.0.1
 # Optional: For better performance
 flash-attn>=2.0.0

runtime.txt CHANGED Viewed

	@@ -1 +1,2 @@
1	python-3.10


1	python-3.10
2	+