""" Loader for ISIC dermoscopy benchmark examples. Reads the curated 10-sample CSV and loads any precomputed attribution results (same directory structure as the PubMedVision / MIMIC-CXR benchmarks). """ from __future__ import annotations import csv from pathlib import Path from typing import Any, Dict, List, Optional from .medical_loader import ( parse_summary_txt, parse_vllm_summary, _build_all_cross_modal_pairs, apply_method_to_clip_summary, apply_method_to_vllm_summary, load_mobius_sidecar, load_vllm_result_json, rename_summary_patch_labels_in_place, rename_cross_pair_patch_labels_in_place, ) # ── Path resolution ────────────────────────────────────────────────────── _VIZ_DIR = Path(__file__).resolve().parent _PROJECT_ROOT = _VIZ_DIR.parent def _resolve_isic_dataset_dir() -> Path: return _PROJECT_ROOT / "results" / "isic" / "dataset" def _resolve_isic_results_dir(method_suffix: str = "") -> Optional[Path]: """Resolve an ISIC results directory. method_suffix examples: "", "_biomedclip", "_llavamed", "_llavamed_unsam", "_vlm_unsam" """ d = _PROJECT_ROOT / "results" / f"isic{method_suffix}" return d if d.exists() else None # ── Example registry ───────────────────────────────────────────────────── ISIC_EXAMPLES: Dict[str, Dict[str, Any]] = {} def _load_isic_examples_from_csv() -> Dict[str, Dict[str, Any]]: """Load the ISIC curated CSV into a registry dict.""" csv_path = _resolve_isic_dataset_dir() / "isic_10.csv" if not csv_path.exists(): return {} examples = {} with open(csv_path, "r", encoding="utf-8") as f: reader = csv.DictReader(f) for row in reader: cocoid = row["cocoid"] example_id = f"coco_{cocoid}" category = row.get("category", "") abbrev = row.get("abbrev", "") caption = row.get("caption", "") cap_short = caption[:60] + "..." if len(caption) > 60 else caption examples[example_id] = { "title": category, "short": cap_short, "category": category, "abbrev": abbrev, "caption": caption, "img_name": row.get("img_name", ""), "class_id": row.get("class_id", ""), "cocoid": cocoid, "source": "ISIC-2019", "has_results": False, } for eid, meta in examples.items(): for suffix in ["", "_biomedclip", "_llavamed_unsam"]: rdir = _resolve_isic_results_dir(suffix) if rdir and (rdir / eid).exists(): meta["has_results"] = True break return examples ISIC_EXAMPLES = _load_isic_examples_from_csv() def get_isic_examples_by_category( category: Optional[str] = None, ) -> Dict[str, Dict[str, Any]]: """Filter ISIC examples by diagnostic category.""" if not category or category.lower() == "all": return ISIC_EXAMPLES return { k: v for k, v in ISIC_EXAMPLES.items() if v.get("category", "").lower() == category.lower() } def list_isic_categories() -> List[str]: """Return sorted list of unique diagnostic categories.""" cats = sorted({v["category"] for v in ISIC_EXAMPLES.values() if v.get("category")}) return cats # ── Image loading ──────────────────────────────────────────────────────── def get_isic_image_path(example_id: str) -> Optional[str]: """Return the path to the original dermoscopy image.""" meta = ISIC_EXAMPLES.get(example_id) if not meta: return None img_name = meta.get("img_name", "") if not img_name: return None img_path = _resolve_isic_dataset_dir() / "images" / img_name return str(img_path) if img_path.exists() else None # ── Result loading ─────────────────────────────────────────────────────── def load_isic_example(example_id: str, *, method: str = "shapley") -> Dict[str, Any]: """Load all available precomputed results for an ISIC example. Returns the same dict shape as load_mimic_example()/load_benchmark_example() so the UI handler can reuse the same logic. """ meta = ISIC_EXAMPLES.get(example_id, {}) caption = meta.get("caption", "") data: Dict[str, Any] = { "example_id": example_id, "meta": meta, "caption": caption, "findings": "", # ISIC has no separate findings field "method": method, "original_image_path": get_isic_image_path(example_id), "has_mobius": {}, "has_clip": False, "has_biomedclip": False, "has_vllm_logprob": False, "has_vllm_gen": False, "has_llavamed_logprob": False, "has_llavamed_gen": False, "has_vlm_unsam_logprob": False, "has_vlm_unsam_gen": False, "has_llavamed_unsam_logprob": False, "has_llavamed_unsam_gen": False, } # ── CLIP cross-modal ───────────────────────────────────────────── clip_dir = _resolve_isic_results_dir("_tok30_dotmask") if not clip_dir: clip_dir = _resolve_isic_results_dir("_tok30") if not clip_dir: clip_dir = _resolve_isic_results_dir() if clip_dir: edir = clip_dir / example_id summary_path = edir / "summary.txt" if summary_path.exists(): summary = parse_summary_txt(summary_path) clip_mobius = load_mobius_sidecar(edir) apply_method_to_clip_summary(summary, clip_mobius, method) rename_summary_patch_labels_in_place(summary) data["has_clip"] = True data["has_mobius"]["clip"] = clip_mobius is not None data["clip"] = { "summary": summary, "mobius_sidecar": clip_mobius, "image_paths": { "original": str(edir / "original.png") if (edir / "original.png").exists() else "", "overlay": str(edir / "overlay.png") if (edir / "overlay.png").exists() else "", "segmap": str(edir / "segmap.png") if (edir / "segmap.png").exists() else "", }, "image_b64": {}, } data["clip"]["all_cross_modal_pairs"] = _build_all_cross_modal_pairs( data["clip"], mobius_sidecar=clip_mobius, method=method, ) rename_cross_pair_patch_labels_in_place(data["clip"]["all_cross_modal_pairs"]) for key in ("original", "overlay", "segmap"): fpath = edir / f"{key}.png" if fpath.exists(): import base64 with open(fpath, "rb") as f: data["clip"].setdefault("image_b64", {})[key] = base64.b64encode(f.read()).decode("ascii") # ── BiomedCLIP cross-modal ─────────────────────────────────────── bc_dir = _resolve_isic_results_dir("_biomedclip") if bc_dir: edir = bc_dir / example_id summary_path = edir / "summary.txt" if summary_path.exists(): summary = parse_summary_txt(summary_path) bc_mobius = load_mobius_sidecar(edir) apply_method_to_clip_summary(summary, bc_mobius, method) rename_summary_patch_labels_in_place(summary) data["has_biomedclip"] = True data["has_mobius"]["biomedclip"] = bc_mobius is not None data["biomedclip"] = { "summary": summary, "mobius_sidecar": bc_mobius, "image_paths": { "original": str(edir / "original.png") if (edir / "original.png").exists() else "", "overlay": str(edir / "overlay.png") if (edir / "overlay.png").exists() else "", "segmap": str(edir / "segmap.png") if (edir / "segmap.png").exists() else "", }, "image_b64": {}, } data["biomedclip"]["all_cross_modal_pairs"] = _build_all_cross_modal_pairs( data["biomedclip"], mobius_sidecar=bc_mobius, method=method, ) rename_cross_pair_patch_labels_in_place(data["biomedclip"]["all_cross_modal_pairs"]) for key in ("original", "overlay", "segmap"): fpath = edir / f"{key}.png" if fpath.exists(): import base64 with open(fpath, "rb") as f: data["biomedclip"].setdefault("image_b64", {})[key] = base64.b64encode(f.read()).decode("ascii") # ── VLM (Qwen2-VL) logprob + gen ──────────────────────────────── vlm_dir = _resolve_isic_results_dir() if vlm_dir: edir = vlm_dir / example_id for prefix, flag_key, json_key in [ ("vllm_logprob", "has_vllm_logprob", "vllm_logprob"), ("vllm_gen", "has_vllm_gen", "vllm_gen"), ]: summary_path = edir / f"{prefix}_summary.txt" if summary_path.exists(): parsed = parse_vllm_summary(summary_path) if parsed: json_data = load_vllm_result_json(edir, prefix, method=method) apply_method_to_vllm_summary(parsed, json_data, method) data[flag_key] = True data[json_key] = parsed data[f"{json_key}_json"] = json_data data["has_mobius"][json_key] = bool(json_data.get("mobius_dict")) overlay = edir / f"{prefix}_overlay.png" if overlay.exists(): data[json_key]["overlay_path"] = str(overlay) # ── LLaVA-Med logprob + gen ────────────────────────────────────── lm_dir = _resolve_isic_results_dir("_llavamed") if lm_dir: edir = lm_dir / example_id for prefix, flag_key, json_key in [ ("vllm_logprob", "has_llavamed_logprob", "llavamed_logprob"), ("vllm_gen", "has_llavamed_gen", "llavamed_gen"), ]: summary_path = edir / f"{prefix}_summary.txt" if summary_path.exists(): parsed = parse_vllm_summary(summary_path) if parsed: json_data = load_vllm_result_json(edir, prefix, method=method) apply_method_to_vllm_summary(parsed, json_data, method) data[flag_key] = True data[json_key] = parsed data[f"{json_key}_json"] = json_data data["has_mobius"][json_key] = bool(json_data.get("mobius_dict")) overlay = edir / f"{prefix}_overlay.png" if overlay.exists(): data[json_key]["overlay_path"] = str(overlay) # ── VLM UnSAM ──────────────────────────────────────────────────── vu_dir = _resolve_isic_results_dir("_vlm_unsam") if vu_dir: edir = vu_dir / example_id for prefix, flag_key, json_key in [ ("vllm_logprob", "has_vlm_unsam_logprob", "vlm_unsam_logprob"), ("vllm_gen", "has_vlm_unsam_gen", "vlm_unsam_gen"), ]: summary_path = edir / f"{prefix}_summary.txt" if summary_path.exists(): parsed = parse_vllm_summary(summary_path) if parsed: json_data = load_vllm_result_json(edir, prefix, method=method) apply_method_to_vllm_summary(parsed, json_data, method) data[flag_key] = True data[json_key] = parsed data[f"{json_key}_json"] = json_data data["has_mobius"][json_key] = bool(json_data.get("mobius_dict")) overlay = edir / f"{prefix}_overlay.png" if overlay.exists(): data[json_key]["overlay_path"] = str(overlay) segmap = edir / "segmap.png" original = edir / "original.png" if segmap.exists(): data["vlm_unsam_segmap_path"] = str(segmap) if original.exists(): data["vlm_unsam_original_path"] = str(original) # ── LLaVA-Med UnSAM ───────────────────────────────────────────── lu_dir = _resolve_isic_results_dir("_llavamed_unsam") if lu_dir: edir = lu_dir / example_id for prefix, flag_key, json_key in [ ("vllm_logprob", "has_llavamed_unsam_logprob", "llavamed_unsam_logprob"), ("vllm_gen", "has_llavamed_unsam_gen", "llavamed_unsam_gen"), ]: summary_path = edir / f"{prefix}_summary.txt" if summary_path.exists(): parsed = parse_vllm_summary(summary_path) if parsed: json_data = load_vllm_result_json(edir, prefix, method=method) apply_method_to_vllm_summary(parsed, json_data, method) data[flag_key] = True data[json_key] = parsed data[f"{json_key}_json"] = json_data data["has_mobius"][json_key] = bool(json_data.get("mobius_dict")) overlay = edir / f"{prefix}_overlay.png" if overlay.exists(): data[json_key]["overlay_path"] = str(overlay) segmap = edir / "segmap.png" original = edir / "original.png" if segmap.exists(): data["llavamed_unsam_segmap_path"] = str(segmap) if original.exists(): data["llavamed_unsam_original_path"] = str(original) return data