Spaces:

ColdSlim
/

Dermatology-AI-Assistant

Build error

App Files Files Community

Manik Sheokand commited on Oct 15, 2025

Commit

cbc528c

1 Parent(s): d5cfeb4

new changes

Browse files

Files changed (3) hide show

.env +4 -0
app.py +3 -3
app.py.backup +324 -0

.env ADDED Viewed

	@@ -0,0 +1,4 @@

+# Optimized settings for free tier
+ZGPU_DURATION=120
+QWEN_MAX_PIXELS=250880
+QWEN_MIN_PIXELS=200704

app.py CHANGED Viewed

@@ -38,11 +38,11 @@ BASE_MODEL_ID = os.environ.get("BASE_MODEL_ID", "Qwen/Qwen2.5-VL-3B-Instruct")
 ADAPTER_ID = os.environ.get("ADAPTER_ID", "ColdSlim/Dermatology-Qwen2.5-VL-3B-LoRA")
 # Give ourselves more time for first load in cold starts
-ZGPU_DURATION = int(os.environ.get("ZGPU_DURATION", "120"))  # seconds
 # Deterministic decoding for eval; tweak as needed
 GEN_KW = dict(
-    max_new_tokens=128,
     do_sample=False,
     temperature=0.0,
     top_p=1.0,
@@ -77,7 +77,7 @@ def _load_multimodal_processor() -> AutoProcessor:
         )
     # optional: stabilize pixel hints
     try:
-        proc.image_processor.max_pixels = int(os.environ.get("QWEN_MAX_PIXELS", str(320 * 28 * 28)))  # ~0.5MP
         proc.image_processor.min_pixels = int(os.environ.get("QWEN_MIN_PIXELS", str(256 * 28 * 28)))
     except Exception:
         pass

 ADAPTER_ID = os.environ.get("ADAPTER_ID", "ColdSlim/Dermatology-Qwen2.5-VL-3B-LoRA")
 # Give ourselves more time for first load in cold starts
+ZGPU_DURATION = int(os.environ.get("ZGPU_DURATION", "60"))  # seconds
 # Deterministic decoding for eval; tweak as needed
 GEN_KW = dict(
+    max_new_tokens=64,
     do_sample=False,
     temperature=0.0,
     top_p=1.0,
         )
     # optional: stabilize pixel hints
     try:
+        proc.image_processor.max_pixels = int(os.environ.get("QWEN_MAX_PIXELS", str(256 * 28 * 28)))  # ~0.2MP
         proc.image_processor.min_pixels = int(os.environ.get("QWEN_MIN_PIXELS", str(256 * 28 * 28)))
     except Exception:
         pass

app.py.backup ADDED Viewed

	@@ -0,0 +1,324 @@

+# app.py
+# Dermatology-AI-Assistant — HF Spaces (ZeroGPU, Qwen2.5-VL + LoRA adapters)
+# - Normal UI for single-image analysis
+# - Hidden API endpoint /analyze_batch for batched evaluation
+# - Caches & sanitizes LoRA repo once at startup (CPU); attaches on GPU per request
+# - No CUDA at import-time; ZeroGPU only inside @spaces.GPU functions
+import os
+import json
+import tempfile
+import shutil
+import logging
+from typing import Optional, List, Dict, Any
+import gradio as gr
+import spaces
+import torch
+from PIL import Image
+from huggingface_hub import snapshot_download
+from peft import PeftModel
+from transformers import AutoProcessor
+# Prefer the new class name if your transformers is recent; fall back to old alias.
+try:
+    from transformers import AutoModelForImageTextToText as VisionTextModelClass
+except Exception:
+    from transformers import AutoModelForVision2Seq as VisionTextModelClass  # deprecated alias
+from qwen_vl_utils import process_vision_info
+logging.basicConfig(level=logging.INFO, format="%(levelname)s:%(name)s:%(message)s")
+logger = logging.getLogger(__name__)
+# ---------------------------
+# Config
+# ---------------------------
+BASE_MODEL_ID = os.environ.get("BASE_MODEL_ID", "Qwen/Qwen2.5-VL-3B-Instruct")
+ADAPTER_ID = os.environ.get("ADAPTER_ID", "ColdSlim/Dermatology-Qwen2.5-VL-3B-LoRA")
+# Give ourselves more time for first load in cold starts
+ZGPU_DURATION = int(os.environ.get("ZGPU_DURATION", "600"))  # seconds
+# Deterministic decoding for eval; tweak as needed
+GEN_KW = dict(
+    max_new_tokens=256,
+    do_sample=False,
+    temperature=0.0,
+    top_p=1.0,
+    repetition_penalty=1.02,
+)
+SYSTEM_PROMPT = (
+    "You are a dermatology assistant. First, look carefully at the IMAGE.\n"
+    "If the image is NOT a close-up of human skin or a dermatologic lesion, "
+    "respond EXACTLY with: 'The image does not appear to show a skin condition; I cannot analyze it.' "
+    "Do not invent findings.\n"
+    "If it IS a skin/lesion photo, provide a concise description, likely differentials (3–5), "
+    "and prudent next steps. Avoid definitive diagnoses and include red flags briefly."
+)
+# ---------------------------
+# Processor (CPU only; safe at import time)
+# ---------------------------
+def _load_multimodal_processor() -> AutoProcessor:
+    logger.info(f"Loading multimodal processor from base: {BASE_MODEL_ID}")
+    proc = AutoProcessor.from_pretrained(
+        BASE_MODEL_ID,
+        trust_remote_code=True,
+        use_fast=False,  # ensure multimodal __call__(images=...) works
+    )
+    # sanity check
+    sig = getattr(proc.__call__, "__signature__", None)
+    accepts_images = ("images" in str(sig)) if sig else hasattr(proc, "image_processor")
+    if not accepts_images or not hasattr(proc, "image_processor"):
+        raise RuntimeError(
+            "Loaded processor is not multimodal. Ensure transformers>=4.44.2, qwen-vl-utils>=0.0.8, torch>=2.2."
+        )
+    # optional: stabilize pixel hints
+    try:
+        proc.image_processor.max_pixels = int(os.environ.get("QWEN_MAX_PIXELS", str(640 * 28 * 28)))  # ~0.5MP
+        proc.image_processor.min_pixels = int(os.environ.get("QWEN_MIN_PIXELS", str(256 * 28 * 28)))
+    except Exception:
+        pass
+    logger.info(f"Processor ready: {proc.__class__.__name__}")
+    return proc
+processor = _load_multimodal_processor()
+# ---------------------------
+# LoRA adapter cache & sanitize (CPU-only, startup)
+# ---------------------------
+def _sanitize_adapter_repo(src_dir: str) -> str:
+    """Remove unknown keys from adapter_config.json so PEFT can parse."""
+    cfg_path = os.path.join(src_dir, "adapter_config.json")
+    if not os.path.isfile(cfg_path):
+        return src_dir
+    with open(cfg_path, "r") as f:
+        cfg = json.load(f)
+    allowed = {
+        "peft_type", "task_type",
+        "r", "lora_alpha", "lora_dropout",
+        "target_modules", "bias",
+        "inference_mode",
+        "base_model_name_or_path",
+        "fan_in_fan_out",
+        "modules_to_save",
+        "layers_to_transform",
+        "layers_pattern",
+        "use_rslora",
+        "rank_dropout", "module_dropout",
+        "init_lora_weights",
+        "use_dora",
+    }
+    # If DoRA isn't actually used, remove its block
+    if str(cfg.get("use_dora", "false")).lower() in ("false", "0", "no"):
+        cfg.pop("dora_config", None)
+    # Drop unknown top-level keys (e.g., 'corda_config', 'eva_config', etc.)
+    for k in list(cfg.keys()):
+        if k not in allowed:
+            cfg.pop(k, None)
+    cfg.setdefault("peft_type", "LORA")
+    cfg.setdefault("task_type", "CAUSAL_LM")
+    cfg.setdefault("bias", "none")
+    cfg.setdefault("inference_mode", True)
+    # Normalize booleans if strings
+    for k in ("inference_mode", "use_rslora", "use_dora", "fan_in_fan_out"):
+        if k in cfg and isinstance(cfg[k], str):
+            cfg[k] = cfg[k].lower() in ("true", "1", "yes")
+    with open(cfg_path, "w") as f:
+        json.dump(cfg, f, indent=2)
+    return src_dir
+logger.info(f"Downloading/caching LoRA adapters: {ADAPTER_ID}")
+_ADAPTER_LOCAL = snapshot_download(ADAPTER_ID, local_dir=None, local_dir_use_symlinks=False)
+_ADAPTER_LOCAL = _sanitize_adapter_repo(_ADAPTER_LOCAL)
+logger.info(f"Adapters ready at: {_ADAPTER_LOCAL}")
+# ---------------------------
+# Helpers
+# ---------------------------
+def _messages(image: Image.Image, question: str):
+    if image.mode != "RGB":
+        image = image.convert("RGB")
+    return [
+        {"role": "system", "content": [{"type": "text", "text": SYSTEM_PROMPT}]},
+        {"role": "user", "content": [{"type": "image", "image": image},
+                                     {"type": "text", "text": question}]},
+    ]
+def build_inputs(image: Image.Image, question: str):
+    msgs = _messages(image, question)
+    text = processor.apply_chat_template(msgs, tokenize=False, add_generation_prompt=True)
+    image_inputs, video_inputs = process_vision_info(msgs)
+    return processor(text=[text], images=image_inputs, videos=video_inputs, return_tensors="pt")
+def _pad_token_id(model):
+    tid = getattr(getattr(processor, "tokenizer", None), "eos_token_id", None)
+    return tid if tid is not None else (getattr(getattr(model, "config", None), "eos_token_id", 0) or 0)
+def _generate_text(model, inputs: Dict[str, Any]) -> str:
+    # move tensors to model device
+    device = next(model.parameters()).device
+    inputs = {k: v.to(device) if isinstance(v, torch.Tensor) else v for k, v in inputs.items()}
+    with torch.no_grad():
+        out_ids = model.generate(**inputs, **GEN_KW, pad_token_id=_pad_token_id(model))
+    # trim prompt
+    trimmed = [o[len(i):] for i, o in zip(inputs["input_ids"], out_ids)]
+    text = processor.batch_decode(trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
+    return text
+def format_derm_disclaimer(ans: str) -> str:
+    return (
+        ans
+        + "\n\n---\n"
+          "_Disclaimer: This AI is not a medical device. The output is informational and may be inaccurate. "
+          "Consult a qualified dermatologist for diagnosis and treatment._"
+    )
+def _load_base_plus_lora(dtype: torch.dtype = torch.float16):
+    logger.info(f"Loading BASE on GPU: {BASE_MODEL_ID}")
+    base = VisionTextModelClass.from_pretrained(
+        BASE_MODEL_ID,
+        torch_dtype=dtype,
+        device_map="cuda",
+        trust_remote_code=True,
+        low_cpu_mem_usage=True,
+    )
+    logger.info(f"Attaching LoRA adapters from: {_ADAPTER_LOCAL}")
+    model = PeftModel.from_pretrained(base, _ADAPTER_LOCAL, is_trainable=False)
+    model.eval()
+    return model
+# ---------------------------
+# Inference (ZeroGPU-safe: only here we touch CUDA)
+# ---------------------------
+@spaces.GPU(duration=ZGPU_DURATION)
+def analyze_skin_condition(image: Optional[Image.Image], question: str) -> str:
+    if image is None:
+        return "❌ Please upload an image first."
+    model = None
+    try:
+        inputs = build_inputs(image, question)
+        # pick fp16; bf16 also works on newer GPUs
+        model = _load_base_plus_lora(dtype=torch.float16)
+        text = _generate_text(model, inputs)
+        return format_derm_disclaimer(text)
+    except Exception as e:
+        logger.exception("Error during inference")
+        return f"❌ Error analyzing image: {e}"
+    finally:
+        if model is not None:
+            del model
+        torch.cuda.empty_cache()
+# ---------------------------
+# Batched inference API (hidden; call via /analyze_batch)
+# ---------------------------
+@spaces.GPU(duration=ZGPU_DURATION)
+def analyze_batch(samples: List[Dict[str, Any]]) -> List[str]:
+    """
+    samples: list of dicts like: {"image": <PIL/Image or filepath>, "question": <str>}
+    Returns a list of responses (same order).
+    """
+    outs: List[str] = []
+    if not isinstance(samples, list):
+        return ["❌ Invalid payload: expected a JSON list of {image, question} dicts."]
+    model = None
+    try:
+        model = _load_base_plus_lora(dtype=torch.float16)
+        for ex in samples:
+            try:
+                img = ex.get("image")
+                q = ex.get("question") or "Describe this skin condition in detail and suggest possible next steps."
+                # If the client sent a path (e.g., via gradio_client handle_file), load it:
+                if isinstance(img, str) and os.path.isfile(img):
+                    img = Image.open(img).convert("RGB")
+                if not isinstance(img, Image.Image):
+                    outs.append("❌ Missing/invalid image")
+                    continue
+                inputs = build_inputs(img, q)
+                text = _generate_text(model, inputs)
+                outs.append(format_derm_disclaimer(text))
+            except Exception as ie:
+                logger.exception("Error on one batch item")
+                outs.append(f"❌ Error analyzing one item: {ie}")
+        return outs
+    except Exception as e:
+        logger.exception("Batch inference failed")
+        return [f"❌ Batch error: {e}"]
+    finally:
+        if model is not None:
+            del model
+        torch.cuda.empty_cache()
+# ---------------------------
+# UI
+# ---------------------------
+def create_interface() -> gr.Blocks:
+    with gr.Blocks(title="Dermatology AI Assistant") as demo:
+        gr.Markdown(
+            "# 🩺 Dermatology AI Assistant\n"
+            "Upload a skin photo and ask a question. The model will provide an informational response."
+        )
+        with gr.Row():
+            image_input = gr.Image(type="pil", label="Upload Image (JPG/PNG)")
+            question_input = gr.Textbox(
+                label="Question / Prompt",
+                value="Describe this skin condition in detail and suggest possible next steps.",
+                lines=3,
+            )
+        with gr.Row():
+            submit_btn = gr.Button("Analyze", variant="primary")
+            clear_btn = gr.Button("Clear")
+        output_box = gr.Textbox(label="Response", lines=16, show_copy_button=True)
+        submit_btn.click(
+            fn=analyze_skin_condition,
+            inputs=[image_input, question_input],
+            outputs=output_box,
+            queue=True,
+            api_name="analyze_skin_condition",  # public API for single requests
+        )
+        clear_btn.click(fn=lambda: (None, ""), inputs=None, outputs=[image_input, question_input])
+        # Hidden minimal iface just to expose a batch API route
+        gr.Interface(
+            fn=analyze_batch,
+            inputs=[gr.JSON(label="samples")],
+            outputs=gr.JSON(label="responses"),
+            allow_flagging="never",
+            api_name="analyze_batch",  # call this from gradio_client
+            visible=False,             # hide in UI; keep route alive
+        )
+        demo.queue()
+        gr.Markdown(
+            "_Tips: Ensure good lighting and focus. Avoid uploading personally identifying information._"
+        )
+    return demo
+def main():
+    demo = create_interface()
+    demo.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        share=False,
+        show_error=True,
+        inbrowser=False,
+        quiet=False,
+        ssr_mode=False,  # no Node requirement
+    )
+if __name__ == "__main__":
+    main()