"""Find best and worst predictions by per-sample IoU and generate showcase figures.""" import json from pathlib import Path import matplotlib matplotlib.use("Agg") import matplotlib.pyplot as plt import numpy as np from PIL import Image from tqdm import tqdm PROJECT_ROOT = Path(__file__).resolve().parents[1] def iou(pred: np.ndarray, gt: np.ndarray) -> float: intersection = np.logical_and(pred, gt).sum() union = np.logical_or(pred, gt).sum() return float(intersection / union) if union > 0 else 0.0 def score_all(): """Score every test prediction against ground truth. Returns dict of per-class scored lists.""" with open(PROJECT_ROOT / "data" / "splits" / "test.json") as f: test_samples = json.load(f) masks_dir = PROJECT_ROOT / "outputs" / "masks" scores = {"taping": [], "cracks": []} for sample in tqdm(test_samples, desc="Scoring predictions"): img_stem = Path(sample["image_path"]).stem ds = sample["dataset"] candidates = list(masks_dir.glob(f"{img_stem}__*.png")) if not candidates: continue gt = np.array(Image.open(sample["mask_path"]).convert("L")) gt_bin = (gt > 127).astype(np.uint8) best_iou = -1 best_pred_path = None best_prompt = None for pred_path in candidates: pred = np.array(Image.open(pred_path).convert("L").resize( (gt.shape[1], gt.shape[0]), Image.NEAREST)) pred_bin = (pred > 127).astype(np.uint8) score = iou(pred_bin, gt_bin) if score > best_iou: best_iou = score best_pred_path = pred_path best_prompt = pred_path.stem.split("__")[1].replace("_", " ") scores[ds].append({ "image_path": sample["image_path"], "mask_path": sample["mask_path"], "pred_path": str(best_pred_path), "prompt": best_prompt, "iou": best_iou, "dataset": ds, }) return scores def pick_ranked(scores, n_per_class=3, best=True): """Pick top-N or bottom-N per class by IoU.""" result = [] for ds in ["cracks", "taping"]: # Filter out zero-IoU (no prediction found) for worst — keep only actual failures pool = [s for s in scores[ds] if s["iou"] > 0] if not best else scores[ds] ranked = sorted(pool, key=lambda x: x["iou"], reverse=best) selected = ranked[:n_per_class] result.extend(selected) label = "best" if best else "worst" print(f"\n{ds} {label} {n_per_class}:") for r in selected: print(f" IoU={r['iou']:.4f} {Path(r['image_path']).name} \"{r['prompt']}\"") return result def generate_grid(examples, output_path, title=""): """Generate original | ground truth | prediction comparison grid.""" n = len(examples) fig, axes = plt.subplots(n, 3, figsize=(14, 4.0 * n)) if n == 1: axes = [axes] if title: fig.suptitle(title, fontsize=16, fontweight="bold", y=0.998) for i, ex in enumerate(examples): img = Image.open(ex["image_path"]).convert("RGB") gt = Image.open(ex["mask_path"]).convert("L") pred = Image.open(ex["pred_path"]).convert("L").resize( (gt.size[0], gt.size[1]), Image.NEAREST) label = ex["dataset"].capitalize() axes[i][0].imshow(img) axes[i][0].set_title(f"Input — {label}", fontsize=11, fontweight="bold") axes[i][0].axis("off") axes[i][1].imshow(gt, cmap="gray", vmin=0, vmax=255) axes[i][1].set_title("Ground Truth", fontsize=11) axes[i][1].axis("off") axes[i][2].imshow(pred, cmap="gray", vmin=0, vmax=255) axes[i][2].set_title(f"Predicted — \"{ex['prompt']}\" (IoU {ex['iou']:.2f})", fontsize=11) axes[i][2].axis("off") plt.tight_layout() plt.savefig(output_path, dpi=150, bbox_inches="tight", facecolor="white") plt.close() print(f"Saved → {output_path}") if __name__ == "__main__": figures_dir = PROJECT_ROOT / "reports" / "figures" scores = score_all() # Best predictions (3 per class) best = pick_ranked(scores, n_per_class=3, best=True) generate_grid(best, figures_dir / "best_predictions.png", title="Best Test-Set Predictions (by IoU)") # Worst predictions (3 per class) — only samples where model actually predicted something worst = pick_ranked(scores, n_per_class=3, best=False) generate_grid(worst, figures_dir / "failure_cases.png", title="Failure Cases — Worst Test-Set Predictions (by IoU)")