drywall-qa-clipseg / src /best_predictions.py
youngPhilosopher's picture
Upload folder using huggingface_hub
b891e61 verified
"""Find best and worst predictions by per-sample IoU and generate showcase figures."""
import json
from pathlib import Path
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image
from tqdm import tqdm
PROJECT_ROOT = Path(__file__).resolve().parents[1]
def iou(pred: np.ndarray, gt: np.ndarray) -> float:
intersection = np.logical_and(pred, gt).sum()
union = np.logical_or(pred, gt).sum()
return float(intersection / union) if union > 0 else 0.0
def score_all():
"""Score every test prediction against ground truth. Returns dict of per-class scored lists."""
with open(PROJECT_ROOT / "data" / "splits" / "test.json") as f:
test_samples = json.load(f)
masks_dir = PROJECT_ROOT / "outputs" / "masks"
scores = {"taping": [], "cracks": []}
for sample in tqdm(test_samples, desc="Scoring predictions"):
img_stem = Path(sample["image_path"]).stem
ds = sample["dataset"]
candidates = list(masks_dir.glob(f"{img_stem}__*.png"))
if not candidates:
continue
gt = np.array(Image.open(sample["mask_path"]).convert("L"))
gt_bin = (gt > 127).astype(np.uint8)
best_iou = -1
best_pred_path = None
best_prompt = None
for pred_path in candidates:
pred = np.array(Image.open(pred_path).convert("L").resize(
(gt.shape[1], gt.shape[0]), Image.NEAREST))
pred_bin = (pred > 127).astype(np.uint8)
score = iou(pred_bin, gt_bin)
if score > best_iou:
best_iou = score
best_pred_path = pred_path
best_prompt = pred_path.stem.split("__")[1].replace("_", " ")
scores[ds].append({
"image_path": sample["image_path"],
"mask_path": sample["mask_path"],
"pred_path": str(best_pred_path),
"prompt": best_prompt,
"iou": best_iou,
"dataset": ds,
})
return scores
def pick_ranked(scores, n_per_class=3, best=True):
"""Pick top-N or bottom-N per class by IoU."""
result = []
for ds in ["cracks", "taping"]:
# Filter out zero-IoU (no prediction found) for worst — keep only actual failures
pool = [s for s in scores[ds] if s["iou"] > 0] if not best else scores[ds]
ranked = sorted(pool, key=lambda x: x["iou"], reverse=best)
selected = ranked[:n_per_class]
result.extend(selected)
label = "best" if best else "worst"
print(f"\n{ds} {label} {n_per_class}:")
for r in selected:
print(f" IoU={r['iou']:.4f} {Path(r['image_path']).name} \"{r['prompt']}\"")
return result
def generate_grid(examples, output_path, title=""):
"""Generate original | ground truth | prediction comparison grid."""
n = len(examples)
fig, axes = plt.subplots(n, 3, figsize=(14, 4.0 * n))
if n == 1:
axes = [axes]
if title:
fig.suptitle(title, fontsize=16, fontweight="bold", y=0.998)
for i, ex in enumerate(examples):
img = Image.open(ex["image_path"]).convert("RGB")
gt = Image.open(ex["mask_path"]).convert("L")
pred = Image.open(ex["pred_path"]).convert("L").resize(
(gt.size[0], gt.size[1]), Image.NEAREST)
label = ex["dataset"].capitalize()
axes[i][0].imshow(img)
axes[i][0].set_title(f"Input — {label}", fontsize=11, fontweight="bold")
axes[i][0].axis("off")
axes[i][1].imshow(gt, cmap="gray", vmin=0, vmax=255)
axes[i][1].set_title("Ground Truth", fontsize=11)
axes[i][1].axis("off")
axes[i][2].imshow(pred, cmap="gray", vmin=0, vmax=255)
axes[i][2].set_title(f"Predicted — \"{ex['prompt']}\" (IoU {ex['iou']:.2f})", fontsize=11)
axes[i][2].axis("off")
plt.tight_layout()
plt.savefig(output_path, dpi=150, bbox_inches="tight", facecolor="white")
plt.close()
print(f"Saved → {output_path}")
if __name__ == "__main__":
figures_dir = PROJECT_ROOT / "reports" / "figures"
scores = score_all()
# Best predictions (3 per class)
best = pick_ranked(scores, n_per_class=3, best=True)
generate_grid(best, figures_dir / "best_predictions.png",
title="Best Test-Set Predictions (by IoU)")
# Worst predictions (3 per class) — only samples where model actually predicted something
worst = pick_ranked(scores, n_per_class=3, best=False)
generate_grid(worst, figures_dir / "failure_cases.png",
title="Failure Cases — Worst Test-Set Predictions (by IoU)")