AttrLLM

Sleeping

File size: 14,479 Bytes

3e72399

"""
Loader for ISIC dermoscopy benchmark examples.

Reads the curated 10-sample CSV and loads any precomputed attribution
results (same directory structure as the PubMedVision / MIMIC-CXR benchmarks).
"""

from __future__ import annotations

import csv
from pathlib import Path
from typing import Any, Dict, List, Optional

from .medical_loader import (
    parse_summary_txt,
    parse_vllm_summary,
    _build_all_cross_modal_pairs,
    apply_method_to_clip_summary,
    apply_method_to_vllm_summary,
    load_mobius_sidecar,
    load_vllm_result_json,
    rename_summary_patch_labels_in_place,
    rename_cross_pair_patch_labels_in_place,
)

# ── Path resolution ──────────────────────────────────────────────────────

_VIZ_DIR = Path(__file__).resolve().parent
_PROJECT_ROOT = _VIZ_DIR.parent


def _resolve_isic_dataset_dir() -> Path:
    return _PROJECT_ROOT / "results" / "isic" / "dataset"


def _resolve_isic_results_dir(method_suffix: str = "") -> Optional[Path]:
    """Resolve an ISIC results directory.

    method_suffix examples: "", "_biomedclip", "_llavamed", "_llavamed_unsam", "_vlm_unsam"
    """
    d = _PROJECT_ROOT / "results" / f"isic{method_suffix}"
    return d if d.exists() else None


# ── Example registry ─────────────────────────────────────────────────────

ISIC_EXAMPLES: Dict[str, Dict[str, Any]] = {}


def _load_isic_examples_from_csv() -> Dict[str, Dict[str, Any]]:
    """Load the ISIC curated CSV into a registry dict."""
    csv_path = _resolve_isic_dataset_dir() / "isic_10.csv"
    if not csv_path.exists():
        return {}

    examples = {}
    with open(csv_path, "r", encoding="utf-8") as f:
        reader = csv.DictReader(f)
        for row in reader:
            cocoid = row["cocoid"]
            example_id = f"coco_{cocoid}"
            category = row.get("category", "")
            abbrev = row.get("abbrev", "")
            caption = row.get("caption", "")

            cap_short = caption[:60] + "..." if len(caption) > 60 else caption

            examples[example_id] = {
                "title": category,
                "short": cap_short,
                "category": category,
                "abbrev": abbrev,
                "caption": caption,
                "img_name": row.get("img_name", ""),
                "class_id": row.get("class_id", ""),
                "cocoid": cocoid,
                "source": "ISIC-2019",
                "has_results": False,
            }

    for eid, meta in examples.items():
        for suffix in ["", "_biomedclip", "_llavamed_unsam"]:
            rdir = _resolve_isic_results_dir(suffix)
            if rdir and (rdir / eid).exists():
                meta["has_results"] = True
                break

    return examples


ISIC_EXAMPLES = _load_isic_examples_from_csv()


def get_isic_examples_by_category(
    category: Optional[str] = None,
) -> Dict[str, Dict[str, Any]]:
    """Filter ISIC examples by diagnostic category."""
    if not category or category.lower() == "all":
        return ISIC_EXAMPLES
    return {
        k: v for k, v in ISIC_EXAMPLES.items()
        if v.get("category", "").lower() == category.lower()
    }


def list_isic_categories() -> List[str]:
    """Return sorted list of unique diagnostic categories."""
    cats = sorted({v["category"] for v in ISIC_EXAMPLES.values() if v.get("category")})
    return cats


# ── Image loading ────────────────────────────────────────────────────────

def get_isic_image_path(example_id: str) -> Optional[str]:
    """Return the path to the original dermoscopy image."""
    meta = ISIC_EXAMPLES.get(example_id)
    if not meta:
        return None
    img_name = meta.get("img_name", "")
    if not img_name:
        return None
    img_path = _resolve_isic_dataset_dir() / "images" / img_name
    return str(img_path) if img_path.exists() else None


# ── Result loading ───────────────────────────────────────────────────────

def load_isic_example(example_id: str, *, method: str = "shapley") -> Dict[str, Any]:
    """Load all available precomputed results for an ISIC example.

    Returns the same dict shape as load_mimic_example()/load_benchmark_example()
    so the UI handler can reuse the same logic.
    """
    meta = ISIC_EXAMPLES.get(example_id, {})
    caption = meta.get("caption", "")

    data: Dict[str, Any] = {
        "example_id": example_id,
        "meta": meta,
        "caption": caption,
        "findings": "",  # ISIC has no separate findings field
        "method": method,
        "original_image_path": get_isic_image_path(example_id),
        "has_mobius": {},
        "has_clip": False,
        "has_biomedclip": False,
        "has_vllm_logprob": False,
        "has_vllm_gen": False,
        "has_llavamed_logprob": False,
        "has_llavamed_gen": False,
        "has_vlm_unsam_logprob": False,
        "has_vlm_unsam_gen": False,
        "has_llavamed_unsam_logprob": False,
        "has_llavamed_unsam_gen": False,
    }

    # ── CLIP cross-modal ─────────────────────────────────────────────
    clip_dir = _resolve_isic_results_dir("_tok30_dotmask")
    if not clip_dir:
        clip_dir = _resolve_isic_results_dir("_tok30")
    if not clip_dir:
        clip_dir = _resolve_isic_results_dir()
    if clip_dir:
        edir = clip_dir / example_id
        summary_path = edir / "summary.txt"
        if summary_path.exists():
            summary = parse_summary_txt(summary_path)
            clip_mobius = load_mobius_sidecar(edir)
            apply_method_to_clip_summary(summary, clip_mobius, method)
            rename_summary_patch_labels_in_place(summary)
            data["has_clip"] = True
            data["has_mobius"]["clip"] = clip_mobius is not None
            data["clip"] = {
                "summary": summary,
                "mobius_sidecar": clip_mobius,
                "image_paths": {
                    "original": str(edir / "original.png") if (edir / "original.png").exists() else "",
                    "overlay": str(edir / "overlay.png") if (edir / "overlay.png").exists() else "",
                    "segmap": str(edir / "segmap.png") if (edir / "segmap.png").exists() else "",
                },
                "image_b64": {},
            }
            data["clip"]["all_cross_modal_pairs"] = _build_all_cross_modal_pairs(
                data["clip"], mobius_sidecar=clip_mobius, method=method,
            )
            rename_cross_pair_patch_labels_in_place(data["clip"]["all_cross_modal_pairs"])
            for key in ("original", "overlay", "segmap"):
                fpath = edir / f"{key}.png"
                if fpath.exists():
                    import base64
                    with open(fpath, "rb") as f:
                        data["clip"].setdefault("image_b64", {})[key] = base64.b64encode(f.read()).decode("ascii")

    # ── BiomedCLIP cross-modal ───────────────────────────────────────
    bc_dir = _resolve_isic_results_dir("_biomedclip")
    if bc_dir:
        edir = bc_dir / example_id
        summary_path = edir / "summary.txt"
        if summary_path.exists():
            summary = parse_summary_txt(summary_path)
            bc_mobius = load_mobius_sidecar(edir)
            apply_method_to_clip_summary(summary, bc_mobius, method)
            rename_summary_patch_labels_in_place(summary)
            data["has_biomedclip"] = True
            data["has_mobius"]["biomedclip"] = bc_mobius is not None
            data["biomedclip"] = {
                "summary": summary,
                "mobius_sidecar": bc_mobius,
                "image_paths": {
                    "original": str(edir / "original.png") if (edir / "original.png").exists() else "",
                    "overlay": str(edir / "overlay.png") if (edir / "overlay.png").exists() else "",
                    "segmap": str(edir / "segmap.png") if (edir / "segmap.png").exists() else "",
                },
                "image_b64": {},
            }
            data["biomedclip"]["all_cross_modal_pairs"] = _build_all_cross_modal_pairs(
                data["biomedclip"], mobius_sidecar=bc_mobius, method=method,
            )
            rename_cross_pair_patch_labels_in_place(data["biomedclip"]["all_cross_modal_pairs"])
            for key in ("original", "overlay", "segmap"):
                fpath = edir / f"{key}.png"
                if fpath.exists():
                    import base64
                    with open(fpath, "rb") as f:
                        data["biomedclip"].setdefault("image_b64", {})[key] = base64.b64encode(f.read()).decode("ascii")

    # ── VLM (Qwen2-VL) logprob + gen ────────────────────────────────
    vlm_dir = _resolve_isic_results_dir()
    if vlm_dir:
        edir = vlm_dir / example_id
        for prefix, flag_key, json_key in [
            ("vllm_logprob", "has_vllm_logprob", "vllm_logprob"),
            ("vllm_gen", "has_vllm_gen", "vllm_gen"),
        ]:
            summary_path = edir / f"{prefix}_summary.txt"
            if summary_path.exists():
                parsed = parse_vllm_summary(summary_path)
                if parsed:
                    json_data = load_vllm_result_json(edir, prefix, method=method)
                    apply_method_to_vllm_summary(parsed, json_data, method)
                    data[flag_key] = True
                    data[json_key] = parsed
                    data[f"{json_key}_json"] = json_data
                    data["has_mobius"][json_key] = bool(json_data.get("mobius_dict"))
                    overlay = edir / f"{prefix}_overlay.png"
                    if overlay.exists():
                        data[json_key]["overlay_path"] = str(overlay)

    # ── LLaVA-Med logprob + gen ──────────────────────────────────────
    lm_dir = _resolve_isic_results_dir("_llavamed")
    if lm_dir:
        edir = lm_dir / example_id
        for prefix, flag_key, json_key in [
            ("vllm_logprob", "has_llavamed_logprob", "llavamed_logprob"),
            ("vllm_gen", "has_llavamed_gen", "llavamed_gen"),
        ]:
            summary_path = edir / f"{prefix}_summary.txt"
            if summary_path.exists():
                parsed = parse_vllm_summary(summary_path)
                if parsed:
                    json_data = load_vllm_result_json(edir, prefix, method=method)
                    apply_method_to_vllm_summary(parsed, json_data, method)
                    data[flag_key] = True
                    data[json_key] = parsed
                    data[f"{json_key}_json"] = json_data
                    data["has_mobius"][json_key] = bool(json_data.get("mobius_dict"))
                    overlay = edir / f"{prefix}_overlay.png"
                    if overlay.exists():
                        data[json_key]["overlay_path"] = str(overlay)

    # ── VLM UnSAM ────────────────────────────────────────────────────
    vu_dir = _resolve_isic_results_dir("_vlm_unsam")
    if vu_dir:
        edir = vu_dir / example_id
        for prefix, flag_key, json_key in [
            ("vllm_logprob", "has_vlm_unsam_logprob", "vlm_unsam_logprob"),
            ("vllm_gen", "has_vlm_unsam_gen", "vlm_unsam_gen"),
        ]:
            summary_path = edir / f"{prefix}_summary.txt"
            if summary_path.exists():
                parsed = parse_vllm_summary(summary_path)
                if parsed:
                    json_data = load_vllm_result_json(edir, prefix, method=method)
                    apply_method_to_vllm_summary(parsed, json_data, method)
                    data[flag_key] = True
                    data[json_key] = parsed
                    data[f"{json_key}_json"] = json_data
                    data["has_mobius"][json_key] = bool(json_data.get("mobius_dict"))
                    overlay = edir / f"{prefix}_overlay.png"
                    if overlay.exists():
                        data[json_key]["overlay_path"] = str(overlay)
        segmap = edir / "segmap.png"
        original = edir / "original.png"
        if segmap.exists():
            data["vlm_unsam_segmap_path"] = str(segmap)
        if original.exists():
            data["vlm_unsam_original_path"] = str(original)

    # ── LLaVA-Med UnSAM ─────────────────────────────────────────────
    lu_dir = _resolve_isic_results_dir("_llavamed_unsam")
    if lu_dir:
        edir = lu_dir / example_id
        for prefix, flag_key, json_key in [
            ("vllm_logprob", "has_llavamed_unsam_logprob", "llavamed_unsam_logprob"),
            ("vllm_gen", "has_llavamed_unsam_gen", "llavamed_unsam_gen"),
        ]:
            summary_path = edir / f"{prefix}_summary.txt"
            if summary_path.exists():
                parsed = parse_vllm_summary(summary_path)
                if parsed:
                    json_data = load_vllm_result_json(edir, prefix, method=method)
                    apply_method_to_vllm_summary(parsed, json_data, method)
                    data[flag_key] = True
                    data[json_key] = parsed
                    data[f"{json_key}_json"] = json_data
                    data["has_mobius"][json_key] = bool(json_data.get("mobius_dict"))
                    overlay = edir / f"{prefix}_overlay.png"
                    if overlay.exists():
                        data[json_key]["overlay_path"] = str(overlay)
        segmap = edir / "segmap.png"
        original = edir / "original.png"
        if segmap.exists():
            data["llavamed_unsam_segmap_path"] = str(segmap)
        if original.exists():
            data["llavamed_unsam_original_path"] = str(original)

    return data