AttrLLM / visualization /isic_loader.py
Qingpeng Kong
clean initial state
3e72399
"""
Loader for ISIC dermoscopy benchmark examples.
Reads the curated 10-sample CSV and loads any precomputed attribution
results (same directory structure as the PubMedVision / MIMIC-CXR benchmarks).
"""
from __future__ import annotations
import csv
from pathlib import Path
from typing import Any, Dict, List, Optional
from .medical_loader import (
parse_summary_txt,
parse_vllm_summary,
_build_all_cross_modal_pairs,
apply_method_to_clip_summary,
apply_method_to_vllm_summary,
load_mobius_sidecar,
load_vllm_result_json,
rename_summary_patch_labels_in_place,
rename_cross_pair_patch_labels_in_place,
)
# ── Path resolution ──────────────────────────────────────────────────────
_VIZ_DIR = Path(__file__).resolve().parent
_PROJECT_ROOT = _VIZ_DIR.parent
def _resolve_isic_dataset_dir() -> Path:
return _PROJECT_ROOT / "results" / "isic" / "dataset"
def _resolve_isic_results_dir(method_suffix: str = "") -> Optional[Path]:
"""Resolve an ISIC results directory.
method_suffix examples: "", "_biomedclip", "_llavamed", "_llavamed_unsam", "_vlm_unsam"
"""
d = _PROJECT_ROOT / "results" / f"isic{method_suffix}"
return d if d.exists() else None
# ── Example registry ─────────────────────────────────────────────────────
ISIC_EXAMPLES: Dict[str, Dict[str, Any]] = {}
def _load_isic_examples_from_csv() -> Dict[str, Dict[str, Any]]:
"""Load the ISIC curated CSV into a registry dict."""
csv_path = _resolve_isic_dataset_dir() / "isic_10.csv"
if not csv_path.exists():
return {}
examples = {}
with open(csv_path, "r", encoding="utf-8") as f:
reader = csv.DictReader(f)
for row in reader:
cocoid = row["cocoid"]
example_id = f"coco_{cocoid}"
category = row.get("category", "")
abbrev = row.get("abbrev", "")
caption = row.get("caption", "")
cap_short = caption[:60] + "..." if len(caption) > 60 else caption
examples[example_id] = {
"title": category,
"short": cap_short,
"category": category,
"abbrev": abbrev,
"caption": caption,
"img_name": row.get("img_name", ""),
"class_id": row.get("class_id", ""),
"cocoid": cocoid,
"source": "ISIC-2019",
"has_results": False,
}
for eid, meta in examples.items():
for suffix in ["", "_biomedclip", "_llavamed_unsam"]:
rdir = _resolve_isic_results_dir(suffix)
if rdir and (rdir / eid).exists():
meta["has_results"] = True
break
return examples
ISIC_EXAMPLES = _load_isic_examples_from_csv()
def get_isic_examples_by_category(
category: Optional[str] = None,
) -> Dict[str, Dict[str, Any]]:
"""Filter ISIC examples by diagnostic category."""
if not category or category.lower() == "all":
return ISIC_EXAMPLES
return {
k: v for k, v in ISIC_EXAMPLES.items()
if v.get("category", "").lower() == category.lower()
}
def list_isic_categories() -> List[str]:
"""Return sorted list of unique diagnostic categories."""
cats = sorted({v["category"] for v in ISIC_EXAMPLES.values() if v.get("category")})
return cats
# ── Image loading ────────────────────────────────────────────────────────
def get_isic_image_path(example_id: str) -> Optional[str]:
"""Return the path to the original dermoscopy image."""
meta = ISIC_EXAMPLES.get(example_id)
if not meta:
return None
img_name = meta.get("img_name", "")
if not img_name:
return None
img_path = _resolve_isic_dataset_dir() / "images" / img_name
return str(img_path) if img_path.exists() else None
# ── Result loading ───────────────────────────────────────────────────────
def load_isic_example(example_id: str, *, method: str = "shapley") -> Dict[str, Any]:
"""Load all available precomputed results for an ISIC example.
Returns the same dict shape as load_mimic_example()/load_benchmark_example()
so the UI handler can reuse the same logic.
"""
meta = ISIC_EXAMPLES.get(example_id, {})
caption = meta.get("caption", "")
data: Dict[str, Any] = {
"example_id": example_id,
"meta": meta,
"caption": caption,
"findings": "", # ISIC has no separate findings field
"method": method,
"original_image_path": get_isic_image_path(example_id),
"has_mobius": {},
"has_clip": False,
"has_biomedclip": False,
"has_vllm_logprob": False,
"has_vllm_gen": False,
"has_llavamed_logprob": False,
"has_llavamed_gen": False,
"has_vlm_unsam_logprob": False,
"has_vlm_unsam_gen": False,
"has_llavamed_unsam_logprob": False,
"has_llavamed_unsam_gen": False,
}
# ── CLIP cross-modal ─────────────────────────────────────────────
clip_dir = _resolve_isic_results_dir("_tok30_dotmask")
if not clip_dir:
clip_dir = _resolve_isic_results_dir("_tok30")
if not clip_dir:
clip_dir = _resolve_isic_results_dir()
if clip_dir:
edir = clip_dir / example_id
summary_path = edir / "summary.txt"
if summary_path.exists():
summary = parse_summary_txt(summary_path)
clip_mobius = load_mobius_sidecar(edir)
apply_method_to_clip_summary(summary, clip_mobius, method)
rename_summary_patch_labels_in_place(summary)
data["has_clip"] = True
data["has_mobius"]["clip"] = clip_mobius is not None
data["clip"] = {
"summary": summary,
"mobius_sidecar": clip_mobius,
"image_paths": {
"original": str(edir / "original.png") if (edir / "original.png").exists() else "",
"overlay": str(edir / "overlay.png") if (edir / "overlay.png").exists() else "",
"segmap": str(edir / "segmap.png") if (edir / "segmap.png").exists() else "",
},
"image_b64": {},
}
data["clip"]["all_cross_modal_pairs"] = _build_all_cross_modal_pairs(
data["clip"], mobius_sidecar=clip_mobius, method=method,
)
rename_cross_pair_patch_labels_in_place(data["clip"]["all_cross_modal_pairs"])
for key in ("original", "overlay", "segmap"):
fpath = edir / f"{key}.png"
if fpath.exists():
import base64
with open(fpath, "rb") as f:
data["clip"].setdefault("image_b64", {})[key] = base64.b64encode(f.read()).decode("ascii")
# ── BiomedCLIP cross-modal ───────────────────────────────────────
bc_dir = _resolve_isic_results_dir("_biomedclip")
if bc_dir:
edir = bc_dir / example_id
summary_path = edir / "summary.txt"
if summary_path.exists():
summary = parse_summary_txt(summary_path)
bc_mobius = load_mobius_sidecar(edir)
apply_method_to_clip_summary(summary, bc_mobius, method)
rename_summary_patch_labels_in_place(summary)
data["has_biomedclip"] = True
data["has_mobius"]["biomedclip"] = bc_mobius is not None
data["biomedclip"] = {
"summary": summary,
"mobius_sidecar": bc_mobius,
"image_paths": {
"original": str(edir / "original.png") if (edir / "original.png").exists() else "",
"overlay": str(edir / "overlay.png") if (edir / "overlay.png").exists() else "",
"segmap": str(edir / "segmap.png") if (edir / "segmap.png").exists() else "",
},
"image_b64": {},
}
data["biomedclip"]["all_cross_modal_pairs"] = _build_all_cross_modal_pairs(
data["biomedclip"], mobius_sidecar=bc_mobius, method=method,
)
rename_cross_pair_patch_labels_in_place(data["biomedclip"]["all_cross_modal_pairs"])
for key in ("original", "overlay", "segmap"):
fpath = edir / f"{key}.png"
if fpath.exists():
import base64
with open(fpath, "rb") as f:
data["biomedclip"].setdefault("image_b64", {})[key] = base64.b64encode(f.read()).decode("ascii")
# ── VLM (Qwen2-VL) logprob + gen ────────────────────────────────
vlm_dir = _resolve_isic_results_dir()
if vlm_dir:
edir = vlm_dir / example_id
for prefix, flag_key, json_key in [
("vllm_logprob", "has_vllm_logprob", "vllm_logprob"),
("vllm_gen", "has_vllm_gen", "vllm_gen"),
]:
summary_path = edir / f"{prefix}_summary.txt"
if summary_path.exists():
parsed = parse_vllm_summary(summary_path)
if parsed:
json_data = load_vllm_result_json(edir, prefix, method=method)
apply_method_to_vllm_summary(parsed, json_data, method)
data[flag_key] = True
data[json_key] = parsed
data[f"{json_key}_json"] = json_data
data["has_mobius"][json_key] = bool(json_data.get("mobius_dict"))
overlay = edir / f"{prefix}_overlay.png"
if overlay.exists():
data[json_key]["overlay_path"] = str(overlay)
# ── LLaVA-Med logprob + gen ──────────────────────────────────────
lm_dir = _resolve_isic_results_dir("_llavamed")
if lm_dir:
edir = lm_dir / example_id
for prefix, flag_key, json_key in [
("vllm_logprob", "has_llavamed_logprob", "llavamed_logprob"),
("vllm_gen", "has_llavamed_gen", "llavamed_gen"),
]:
summary_path = edir / f"{prefix}_summary.txt"
if summary_path.exists():
parsed = parse_vllm_summary(summary_path)
if parsed:
json_data = load_vllm_result_json(edir, prefix, method=method)
apply_method_to_vllm_summary(parsed, json_data, method)
data[flag_key] = True
data[json_key] = parsed
data[f"{json_key}_json"] = json_data
data["has_mobius"][json_key] = bool(json_data.get("mobius_dict"))
overlay = edir / f"{prefix}_overlay.png"
if overlay.exists():
data[json_key]["overlay_path"] = str(overlay)
# ── VLM UnSAM ────────────────────────────────────────────────────
vu_dir = _resolve_isic_results_dir("_vlm_unsam")
if vu_dir:
edir = vu_dir / example_id
for prefix, flag_key, json_key in [
("vllm_logprob", "has_vlm_unsam_logprob", "vlm_unsam_logprob"),
("vllm_gen", "has_vlm_unsam_gen", "vlm_unsam_gen"),
]:
summary_path = edir / f"{prefix}_summary.txt"
if summary_path.exists():
parsed = parse_vllm_summary(summary_path)
if parsed:
json_data = load_vllm_result_json(edir, prefix, method=method)
apply_method_to_vllm_summary(parsed, json_data, method)
data[flag_key] = True
data[json_key] = parsed
data[f"{json_key}_json"] = json_data
data["has_mobius"][json_key] = bool(json_data.get("mobius_dict"))
overlay = edir / f"{prefix}_overlay.png"
if overlay.exists():
data[json_key]["overlay_path"] = str(overlay)
segmap = edir / "segmap.png"
original = edir / "original.png"
if segmap.exists():
data["vlm_unsam_segmap_path"] = str(segmap)
if original.exists():
data["vlm_unsam_original_path"] = str(original)
# ── LLaVA-Med UnSAM ─────────────────────────────────────────────
lu_dir = _resolve_isic_results_dir("_llavamed_unsam")
if lu_dir:
edir = lu_dir / example_id
for prefix, flag_key, json_key in [
("vllm_logprob", "has_llavamed_unsam_logprob", "llavamed_unsam_logprob"),
("vllm_gen", "has_llavamed_unsam_gen", "llavamed_unsam_gen"),
]:
summary_path = edir / f"{prefix}_summary.txt"
if summary_path.exists():
parsed = parse_vllm_summary(summary_path)
if parsed:
json_data = load_vllm_result_json(edir, prefix, method=method)
apply_method_to_vllm_summary(parsed, json_data, method)
data[flag_key] = True
data[json_key] = parsed
data[f"{json_key}_json"] = json_data
data["has_mobius"][json_key] = bool(json_data.get("mobius_dict"))
overlay = edir / f"{prefix}_overlay.png"
if overlay.exists():
data[json_key]["overlay_path"] = str(overlay)
segmap = edir / "segmap.png"
original = edir / "original.png"
if segmap.exists():
data["llavamed_unsam_segmap_path"] = str(segmap)
if original.exists():
data["llavamed_unsam_original_path"] = str(original)
return data