AttrLLM

Sleeping

Qingpeng Kong

clean initial state

3e72399 about 1 month ago

14.5 kB

	"""
	Loader for ISIC dermoscopy benchmark examples.

	Reads the curated 10-sample CSV and loads any precomputed attribution
	results (same directory structure as the PubMedVision / MIMIC-CXR benchmarks).
	"""

	from __future__ import annotations

	import csv
	from pathlib import Path
	from typing import Any, Dict, List, Optional

	from .medical_loader import (
	parse_summary_txt,
	parse_vllm_summary,
	_build_all_cross_modal_pairs,
	apply_method_to_clip_summary,
	apply_method_to_vllm_summary,
	load_mobius_sidecar,
	load_vllm_result_json,
	rename_summary_patch_labels_in_place,
	rename_cross_pair_patch_labels_in_place,
	)

	# ── Path resolution ──────────────────────────────────────────────────────

	_VIZ_DIR = Path(__file__).resolve().parent
	_PROJECT_ROOT = _VIZ_DIR.parent


	def _resolve_isic_dataset_dir() -> Path:
	return _PROJECT_ROOT / "results" / "isic" / "dataset"


	def _resolve_isic_results_dir(method_suffix: str = "") -> Optional[Path]:
	"""Resolve an ISIC results directory.

	method_suffix examples: "", "_biomedclip", "_llavamed", "_llavamed_unsam", "_vlm_unsam"
	"""
	d = _PROJECT_ROOT / "results" / f"isic{method_suffix}"
	return d if d.exists() else None


	# ── Example registry ─────────────────────────────────────────────────────

	ISIC_EXAMPLES: Dict[str, Dict[str, Any]] = {}


	def _load_isic_examples_from_csv() -> Dict[str, Dict[str, Any]]:
	"""Load the ISIC curated CSV into a registry dict."""
	csv_path = _resolve_isic_dataset_dir() / "isic_10.csv"
	if not csv_path.exists():
	return {}

	examples = {}
	with open(csv_path, "r", encoding="utf-8") as f:
	reader = csv.DictReader(f)
	for row in reader:
	cocoid = row["cocoid"]
	example_id = f"coco_{cocoid}"
	category = row.get("category", "")
	abbrev = row.get("abbrev", "")
	caption = row.get("caption", "")

	cap_short = caption[:60] + "..." if len(caption) > 60 else caption

	examples[example_id] = {
	"title": category,
	"short": cap_short,
	"category": category,
	"abbrev": abbrev,
	"caption": caption,
	"img_name": row.get("img_name", ""),
	"class_id": row.get("class_id", ""),
	"cocoid": cocoid,
	"source": "ISIC-2019",
	"has_results": False,
	}

	for eid, meta in examples.items():
	for suffix in ["", "_biomedclip", "_llavamed_unsam"]:
	rdir = _resolve_isic_results_dir(suffix)
	if rdir and (rdir / eid).exists():
	meta["has_results"] = True
	break

	return examples


	ISIC_EXAMPLES = _load_isic_examples_from_csv()


	def get_isic_examples_by_category(
	category: Optional[str] = None,
	) -> Dict[str, Dict[str, Any]]:
	"""Filter ISIC examples by diagnostic category."""
	if not category or category.lower() == "all":
	return ISIC_EXAMPLES
	return {
	k: v for k, v in ISIC_EXAMPLES.items()
	if v.get("category", "").lower() == category.lower()
	}


	def list_isic_categories() -> List[str]:
	"""Return sorted list of unique diagnostic categories."""
	cats = sorted({v["category"] for v in ISIC_EXAMPLES.values() if v.get("category")})
	return cats


	# ── Image loading ────────────────────────────────────────────────────────

	def get_isic_image_path(example_id: str) -> Optional[str]:
	"""Return the path to the original dermoscopy image."""
	meta = ISIC_EXAMPLES.get(example_id)
	if not meta:
	return None
	img_name = meta.get("img_name", "")
	if not img_name:
	return None
	img_path = _resolve_isic_dataset_dir() / "images" / img_name
	return str(img_path) if img_path.exists() else None


	# ── Result loading ───────────────────────────────────────────────────────

	def load_isic_example(example_id: str, *, method: str = "shapley") -> Dict[str, Any]:
	"""Load all available precomputed results for an ISIC example.

	Returns the same dict shape as load_mimic_example()/load_benchmark_example()
	so the UI handler can reuse the same logic.
	"""
	meta = ISIC_EXAMPLES.get(example_id, {})
	caption = meta.get("caption", "")

	data: Dict[str, Any] = {
	"example_id": example_id,
	"meta": meta,
	"caption": caption,
	"findings": "", # ISIC has no separate findings field
	"method": method,
	"original_image_path": get_isic_image_path(example_id),
	"has_mobius": {},
	"has_clip": False,
	"has_biomedclip": False,
	"has_vllm_logprob": False,
	"has_vllm_gen": False,
	"has_llavamed_logprob": False,
	"has_llavamed_gen": False,
	"has_vlm_unsam_logprob": False,
	"has_vlm_unsam_gen": False,
	"has_llavamed_unsam_logprob": False,
	"has_llavamed_unsam_gen": False,
	}

	# ── CLIP cross-modal ─────────────────────────────────────────────
	clip_dir = _resolve_isic_results_dir("_tok30_dotmask")
	if not clip_dir:
	clip_dir = _resolve_isic_results_dir("_tok30")
	if not clip_dir:
	clip_dir = _resolve_isic_results_dir()
	if clip_dir:
	edir = clip_dir / example_id
	summary_path = edir / "summary.txt"
	if summary_path.exists():
	summary = parse_summary_txt(summary_path)
	clip_mobius = load_mobius_sidecar(edir)
	apply_method_to_clip_summary(summary, clip_mobius, method)
	rename_summary_patch_labels_in_place(summary)
	data["has_clip"] = True
	data["has_mobius"]["clip"] = clip_mobius is not None
	data["clip"] = {
	"summary": summary,
	"mobius_sidecar": clip_mobius,
	"image_paths": {
	"original": str(edir / "original.png") if (edir / "original.png").exists() else "",
	"overlay": str(edir / "overlay.png") if (edir / "overlay.png").exists() else "",
	"segmap": str(edir / "segmap.png") if (edir / "segmap.png").exists() else "",
	},
	"image_b64": {},
	}
	data["clip"]["all_cross_modal_pairs"] = _build_all_cross_modal_pairs(
	data["clip"], mobius_sidecar=clip_mobius, method=method,
	)
	rename_cross_pair_patch_labels_in_place(data["clip"]["all_cross_modal_pairs"])
	for key in ("original", "overlay", "segmap"):
	fpath = edir / f"{key}.png"
	if fpath.exists():
	import base64
	with open(fpath, "rb") as f:
	data["clip"].setdefault("image_b64", {})[key] = base64.b64encode(f.read()).decode("ascii")

	# ── BiomedCLIP cross-modal ───────────────────────────────────────
	bc_dir = _resolve_isic_results_dir("_biomedclip")
	if bc_dir:
	edir = bc_dir / example_id
	summary_path = edir / "summary.txt"
	if summary_path.exists():
	summary = parse_summary_txt(summary_path)
	bc_mobius = load_mobius_sidecar(edir)
	apply_method_to_clip_summary(summary, bc_mobius, method)
	rename_summary_patch_labels_in_place(summary)
	data["has_biomedclip"] = True
	data["has_mobius"]["biomedclip"] = bc_mobius is not None
	data["biomedclip"] = {
	"summary": summary,
	"mobius_sidecar": bc_mobius,
	"image_paths": {
	"original": str(edir / "original.png") if (edir / "original.png").exists() else "",
	"overlay": str(edir / "overlay.png") if (edir / "overlay.png").exists() else "",
	"segmap": str(edir / "segmap.png") if (edir / "segmap.png").exists() else "",
	},
	"image_b64": {},
	}
	data["biomedclip"]["all_cross_modal_pairs"] = _build_all_cross_modal_pairs(
	data["biomedclip"], mobius_sidecar=bc_mobius, method=method,
	)
	rename_cross_pair_patch_labels_in_place(data["biomedclip"]["all_cross_modal_pairs"])
	for key in ("original", "overlay", "segmap"):
	fpath = edir / f"{key}.png"
	if fpath.exists():
	import base64
	with open(fpath, "rb") as f:
	data["biomedclip"].setdefault("image_b64", {})[key] = base64.b64encode(f.read()).decode("ascii")

	# ── VLM (Qwen2-VL) logprob + gen ────────────────────────────────
	vlm_dir = _resolve_isic_results_dir()
	if vlm_dir:
	edir = vlm_dir / example_id
	for prefix, flag_key, json_key in [
	("vllm_logprob", "has_vllm_logprob", "vllm_logprob"),
	("vllm_gen", "has_vllm_gen", "vllm_gen"),
	]:
	summary_path = edir / f"{prefix}_summary.txt"
	if summary_path.exists():
	parsed = parse_vllm_summary(summary_path)
	if parsed:
	json_data = load_vllm_result_json(edir, prefix, method=method)
	apply_method_to_vllm_summary(parsed, json_data, method)
	data[flag_key] = True
	data[json_key] = parsed
	data[f"{json_key}_json"] = json_data
	data["has_mobius"][json_key] = bool(json_data.get("mobius_dict"))
	overlay = edir / f"{prefix}_overlay.png"
	if overlay.exists():
	data[json_key]["overlay_path"] = str(overlay)

	# ── LLaVA-Med logprob + gen ──────────────────────────────────────
	lm_dir = _resolve_isic_results_dir("_llavamed")
	if lm_dir:
	edir = lm_dir / example_id
	for prefix, flag_key, json_key in [
	("vllm_logprob", "has_llavamed_logprob", "llavamed_logprob"),
	("vllm_gen", "has_llavamed_gen", "llavamed_gen"),
	]:
	summary_path = edir / f"{prefix}_summary.txt"
	if summary_path.exists():
	parsed = parse_vllm_summary(summary_path)
	if parsed:
	json_data = load_vllm_result_json(edir, prefix, method=method)
	apply_method_to_vllm_summary(parsed, json_data, method)
	data[flag_key] = True
	data[json_key] = parsed
	data[f"{json_key}_json"] = json_data
	data["has_mobius"][json_key] = bool(json_data.get("mobius_dict"))
	overlay = edir / f"{prefix}_overlay.png"
	if overlay.exists():
	data[json_key]["overlay_path"] = str(overlay)

	# ── VLM UnSAM ────────────────────────────────────────────────────
	vu_dir = _resolve_isic_results_dir("_vlm_unsam")
	if vu_dir:
	edir = vu_dir / example_id
	for prefix, flag_key, json_key in [
	("vllm_logprob", "has_vlm_unsam_logprob", "vlm_unsam_logprob"),
	("vllm_gen", "has_vlm_unsam_gen", "vlm_unsam_gen"),
	]:
	summary_path = edir / f"{prefix}_summary.txt"
	if summary_path.exists():
	parsed = parse_vllm_summary(summary_path)
	if parsed:
	json_data = load_vllm_result_json(edir, prefix, method=method)
	apply_method_to_vllm_summary(parsed, json_data, method)
	data[flag_key] = True
	data[json_key] = parsed
	data[f"{json_key}_json"] = json_data
	data["has_mobius"][json_key] = bool(json_data.get("mobius_dict"))
	overlay = edir / f"{prefix}_overlay.png"
	if overlay.exists():
	data[json_key]["overlay_path"] = str(overlay)
	segmap = edir / "segmap.png"
	original = edir / "original.png"
	if segmap.exists():
	data["vlm_unsam_segmap_path"] = str(segmap)
	if original.exists():
	data["vlm_unsam_original_path"] = str(original)

	# ── LLaVA-Med UnSAM ─────────────────────────────────────────────
	lu_dir = _resolve_isic_results_dir("_llavamed_unsam")
	if lu_dir:
	edir = lu_dir / example_id
	for prefix, flag_key, json_key in [
	("vllm_logprob", "has_llavamed_unsam_logprob", "llavamed_unsam_logprob"),
	("vllm_gen", "has_llavamed_unsam_gen", "llavamed_unsam_gen"),
	]:
	summary_path = edir / f"{prefix}_summary.txt"
	if summary_path.exists():
	parsed = parse_vllm_summary(summary_path)
	if parsed:
	json_data = load_vllm_result_json(edir, prefix, method=method)
	apply_method_to_vllm_summary(parsed, json_data, method)
	data[flag_key] = True
	data[json_key] = parsed
	data[f"{json_key}_json"] = json_data
	data["has_mobius"][json_key] = bool(json_data.get("mobius_dict"))
	overlay = edir / f"{prefix}_overlay.png"
	if overlay.exists():
	data[json_key]["overlay_path"] = str(overlay)
	segmap = edir / "segmap.png"
	original = edir / "original.png"
	if segmap.exists():
	data["llavamed_unsam_segmap_path"] = str(segmap)
	if original.exists():
	data["llavamed_unsam_original_path"] = str(original)

	return data