Spaces:
Sleeping
Sleeping
File size: 8,791 Bytes
a51a1a7 2d788b3 a51a1a7 2d788b3 a51a1a7 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 | """Generalisation Evaluation β Stage 6 of the Generalisation pipeline."""
import streamlit as st
import cv2
import numpy as np
import plotly.graph_objects as go
import plotly.figure_factory as ff
from src.models import BACKBONES
def _iou(a, b):
xi1 = max(a[0], b[0]); yi1 = max(a[1], b[1])
xi2 = min(a[2], b[2]); yi2 = min(a[3], b[3])
inter = max(0, xi2 - xi1) * max(0, yi2 - yi1)
aa = (a[2] - a[0]) * (a[3] - a[1])
ab = (b[2] - b[0]) * (b[3] - b[1])
return inter / (aa + ab - inter + 1e-6)
def match_detections(dets, gt_list, iou_thr):
dets_sorted = sorted(dets, key=lambda d: d[5], reverse=True)
matched_gt = set()
results = []
for det in dets_sorted:
det_box = det[:4]
best_iou, best_gt_idx, best_gt_label = 0.0, -1, None
for gi, (gt_box, gt_label) in enumerate(gt_list):
if gi in matched_gt:
continue
iou_val = _iou(det_box, gt_box)
if iou_val > best_iou:
best_iou, best_gt_idx, best_gt_label = iou_val, gi, gt_label
if best_iou >= iou_thr and best_gt_idx >= 0:
matched_gt.add(best_gt_idx)
results.append((det, best_gt_label, best_iou))
else:
results.append((det, None, best_iou))
return results, len(gt_list) - len(matched_gt), matched_gt
def compute_pr_curve(dets, gt_list, iou_thr, steps=50):
if not dets:
return [], [], [], []
thresholds = np.linspace(0.0, 1.0, steps)
precisions, recalls, f1s = [], [], []
for thr in thresholds:
filtered = [d for d in dets if d[5] >= thr]
if not filtered:
precisions.append(1.0); recalls.append(0.0); f1s.append(0.0)
continue
matched, n_missed, _ = match_detections(filtered, gt_list, iou_thr)
tp = sum(1 for _, gt_lbl, _ in matched if gt_lbl is not None)
fp = sum(1 for _, gt_lbl, _ in matched if gt_lbl is None)
fn = n_missed
prec = tp / (tp + fp) if (tp + fp) > 0 else 1.0
rec = tp / (tp + fn) if (tp + fn) > 0 else 0.0
f1 = 2 * prec * rec / (prec + rec) if (prec + rec) > 0 else 0.0
precisions.append(prec); recalls.append(rec); f1s.append(f1)
return thresholds.tolist(), precisions, recalls, f1s
def build_confusion_matrix(dets, gt_list, iou_thr):
gt_labels = sorted(set(lbl for _, lbl in gt_list))
all_labels = gt_labels + ["background"]
n = len(all_labels)
matrix = np.zeros((n, n), dtype=int)
label_to_idx = {lbl: i for i, lbl in enumerate(all_labels)}
matched, n_missed, matched_gt_indices = match_detections(dets, gt_list, iou_thr)
for det, gt_lbl, _ in matched:
pred_lbl = det[4]
if gt_lbl is not None:
pi = label_to_idx.get(pred_lbl, label_to_idx["background"])
gi = label_to_idx[gt_lbl]
matrix[pi][gi] += 1
else:
pi = label_to_idx.get(pred_lbl, label_to_idx["background"])
matrix[pi][label_to_idx["background"]] += 1
for gi, (_, gt_lbl) in enumerate(gt_list):
if gi not in matched_gt_indices:
matrix[label_to_idx["background"]][label_to_idx[gt_lbl]] += 1
return matrix, all_labels
def render():
st.title("π Evaluation: Confusion Matrix & PR Curves")
pipe = st.session_state.get("gen_pipeline")
if not pipe:
st.error("Complete the **Data Lab** first.")
st.stop()
crop = pipe.get("crop")
crop_aug = pipe.get("crop_aug", crop)
bbox = pipe.get("crop_bbox", (0, 0, crop.shape[1], crop.shape[0])) if crop is not None else None
rois = pipe.get("rois", [{"label": "object", "bbox": bbox,
"crop": crop, "crop_aug": crop_aug}])
rce_dets = pipe.get("rce_dets")
cnn_dets = pipe.get("cnn_dets")
orb_dets = pipe.get("orb_dets")
if rce_dets is None and cnn_dets is None and orb_dets is None:
st.warning("Run detection first in **Real-Time Detection**.")
st.stop()
gt_boxes = [(roi["bbox"], roi["label"]) for roi in rois]
st.sidebar.subheader("Evaluation Settings")
iou_thresh = st.sidebar.slider("IoU Threshold", 0.1, 0.9, 0.5, 0.05,
help="Minimum IoU to count as TP",
key="gen_eval_iou")
st.subheader("Ground Truth (from Data Lab ROIs)")
st.caption(f"{len(gt_boxes)} ground-truth ROIs defined")
gt_vis = pipe["test_image"].copy()
for (bx0, by0, bx1, by1), lbl in gt_boxes:
cv2.rectangle(gt_vis, (bx0, by0), (bx1, by1), (0, 255, 255), 2)
cv2.putText(gt_vis, lbl, (bx0, by0 - 6),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 255), 1)
st.image(cv2.cvtColor(gt_vis, cv2.COLOR_BGR2RGB),
caption="Ground Truth Annotations", use_container_width=True)
st.divider()
methods = {}
if rce_dets is not None:
methods["RCE"] = rce_dets
if cnn_dets is not None:
methods["CNN"] = cnn_dets
if orb_dets is not None:
methods["ORB"] = orb_dets
# Confusion Matrices
st.subheader("π² Confusion Matrices")
cm_cols = st.columns(len(methods))
for col, (name, dets) in zip(cm_cols, methods.items()):
with col:
st.markdown(f"**{name}**")
matrix, labels = build_confusion_matrix(dets, gt_boxes, iou_thresh)
fig_cm = ff.create_annotated_heatmap(
z=matrix.tolist(), x=labels, y=labels,
colorscale="Blues", showscale=True)
fig_cm.update_layout(title=f"{name} Confusion Matrix",
xaxis_title="Actual", yaxis_title="Predicted",
template="plotly_dark", height=350)
fig_cm.update_yaxes(autorange="reversed")
st.plotly_chart(fig_cm, use_container_width=True)
matched, n_missed, _ = match_detections(dets, gt_boxes, iou_thresh)
tp = sum(1 for _, g, _ in matched if g is not None)
fp = sum(1 for _, g, _ in matched if g is None)
fn = n_missed
prec = tp / (tp + fp) if (tp + fp) > 0 else 0.0
rec = tp / (tp + fn) if (tp + fn) > 0 else 0.0
f1 = 2 * prec * rec / (prec + rec) if (prec + rec) > 0 else 0.0
m1, m2, m3 = st.columns(3)
m1.metric("Precision", f"{prec:.1%}")
m2.metric("Recall", f"{rec:.1%}")
m3.metric("F1 Score", f"{f1:.1%}")
# PR Curves
st.divider()
st.subheader("π Precision-Recall Curves")
method_colors = {"RCE": "#00ff88", "CNN": "#4488ff", "ORB": "#ff8800"}
fig_pr = go.Figure()
fig_f1 = go.Figure()
summary_rows = []
for name, dets in methods.items():
thrs, precs, recs, f1s = compute_pr_curve(dets, gt_boxes, iou_thresh)
clr = method_colors.get(name, "#ffffff")
fig_pr.add_trace(go.Scatter(
x=recs, y=precs, mode="lines+markers",
name=name, line=dict(color=clr, width=2), marker=dict(size=4)))
fig_f1.add_trace(go.Scatter(
x=thrs, y=f1s, mode="lines",
name=name, line=dict(color=clr, width=2)))
# 11-point interpolated AP (VOC standard)
if recs and precs:
ap = 0.0
for t in np.arange(0.0, 1.1, 0.1):
p_at_r = [p for p, r in zip(precs, recs) if r >= t]
ap += max(p_at_r) if p_at_r else 0.0
ap /= 11.0
else:
ap = 0.0
best_f1_idx = int(np.argmax(f1s)) if f1s else 0
summary_rows.append({
"Method": name,
"AP": f"{ap:.3f}",
"Best F1": f"{f1s[best_f1_idx]:.3f}" if f1s else "N/A",
"@ Threshold": f"{thrs[best_f1_idx]:.2f}" if thrs else "N/A",
"Detections": len(dets),
})
fig_pr.update_layout(title="Precision vs Recall",
xaxis_title="Recall", yaxis_title="Precision",
template="plotly_dark", height=400,
xaxis=dict(range=[0, 1.05]), yaxis=dict(range=[0, 1.05]))
fig_f1.update_layout(title="F1 Score vs Confidence Threshold",
xaxis_title="Confidence Threshold", yaxis_title="F1 Score",
template="plotly_dark", height=400,
xaxis=dict(range=[0, 1.05]), yaxis=dict(range=[0, 1.05]))
pc1, pc2 = st.columns(2)
pc1.plotly_chart(fig_pr, use_container_width=True)
pc2.plotly_chart(fig_f1, use_container_width=True)
# Summary Table
st.divider()
st.subheader("π Summary")
import pandas as pd
st.dataframe(pd.DataFrame(summary_rows), use_container_width=True, hide_index=True)
st.caption(f"All metrics computed at IoU threshold = **{iou_thresh:.2f}**.")
|