ring-sizer / script /analyze_calibration.py
feng-x's picture
Upload folder using huggingface_hub
8bc7d2f verified
#!/usr/bin/env python3
"""Analysis & regression script for calibration dataset.
Performs:
1. px/cm stability analysis
2. A vs B repeatability
3. Ground truth sanity check (π×diameter vs circumference)
4. Scatter plot & bias analysis
5. Linear regression with leave-one-person-out cross-validation
"""
import json
import math
import os
from pathlib import Path
import numpy as np
# Optional: matplotlib for plots (skip gracefully if missing)
try:
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
HAS_PLT = True
except ImportError:
HAS_PLT = False
print("Warning: matplotlib not available, skipping plots")
def load_results(path: str) -> list[dict]:
with open(path, encoding="utf-8") as f:
return json.load(f)
def section(title: str):
print(f"\n{'='*60}")
print(f" {title}")
print(f"{'='*60}")
def analyze_scale_stability(results: list[dict]):
"""Phase 3a: px/cm stability across all images."""
section("1. Card Detection / px/cm Stability")
scales = {}
for r in results:
img = r["image"]
if img not in scales and r["cv_scale_px_per_cm"]:
scales[img] = r["cv_scale_px_per_cm"]
vals = list(scales.values())
mean_s = np.mean(vals)
std_s = np.std(vals)
cv_pct = (std_s / mean_s) * 100
print(f" Images analyzed: {len(scales)}")
print(f" Mean px/cm: {mean_s:.2f}")
print(f" Std px/cm: {std_s:.2f}")
print(f" CV%: {cv_pct:.2f}%")
print(f" Range: {min(vals):.2f}{max(vals):.2f}")
print(f" Max spread: {max(vals)-min(vals):.2f} px/cm ({(max(vals)-min(vals))/mean_s*100:.2f}%)")
# Per-image table
print(f"\n {'Image':<16} {'px/cm':>8} {'Δ from mean':>12}")
print(f" {'-'*38}")
for img, s in sorted(scales.items()):
print(f" {img:<16} {s:>8.2f} {s-mean_s:>+12.2f}")
return mean_s, std_s
def analyze_repeatability(results: list[dict]):
"""Phase 3b: A vs B repeatability."""
section("2. A vs B Repeatability")
# Group by (person, finger)
pairs = {}
for r in results:
key = (r["person"], r["finger_en"])
if key not in pairs:
pairs[key] = {}
pairs[key][r["shot"]] = r["cv_diameter_cm"]
diffs = []
print(f" {'Person':<10} {'Finger':<8} {'Shot A':>8} {'Shot B':>8} {'Δ(B-A)':>8} {'%diff':>7}")
print(f" {'-'*53}")
for (person, finger), shots in sorted(pairs.items()):
a = shots.get("A")
b = shots.get("B")
if a and b:
d = b - a
pct = abs(d) / ((a + b) / 2) * 100
diffs.append(abs(d))
print(f" {person:<10} {finger:<8} {a:>8.3f} {b:>8.3f} {d:>+8.3f} {pct:>6.1f}%")
if diffs:
print(f"\n Mean |A-B| difference: {np.mean(diffs):.4f} cm")
print(f" Max |A-B| difference: {max(diffs):.4f} cm")
print(f" Std |A-B| difference: {np.std(diffs):.4f} cm")
print(f" 95th percentile: {np.percentile(diffs, 95):.4f} cm")
return diffs
def check_ground_truth_sanity(results: list[dict]):
"""Phase 3c: Check π×diameter ≈ circumference."""
section("3. Ground Truth Sanity (π×diameter vs circumference)")
seen = set()
diffs = []
print(f" {'Person':<10} {'Finger':<6} {'Diam':>6} {'Circ':>6} {'π×D':>6} {'Δ':>7} {'%err':>6}")
print(f" {'-'*55}")
for r in results:
key = (r["person"], r["finger_cn"])
if key in seen:
continue
seen.add(key)
d = r["gt_diameter_cm"]
c = r["gt_circumference_cm"]
if d and c:
pi_d = math.pi * d
diff = c - pi_d
pct = diff / c * 100
diffs.append(diff)
print(f" {r['person']:<10} {r['finger_cn']:<6} {d:>6.2f} {c:>6.1f} {pi_d:>6.2f} {diff:>+7.2f} {pct:>+5.1f}%")
if diffs:
print(f"\n Mean (circ - π×diam): {np.mean(diffs):+.3f} cm")
print(f" This is expected: circumference > π×diameter because")
print(f" fingers are not perfect circles (slightly oval/flattened).")
def bias_analysis(results: list[dict]):
"""Phase 4a: Scatter plot and bias analysis."""
section("4. Accuracy & Bias Analysis")
valid = [r for r in results if r["cv_diameter_cm"] and r["gt_diameter_cm"]]
cv = np.array([r["cv_diameter_cm"] for r in valid])
gt = np.array([r["gt_diameter_cm"] for r in valid])
errors = cv - gt
pct_errors = errors / gt * 100
print(f" N = {len(valid)} measurements")
print(f" Mean error (CV-GT): {np.mean(errors):+.4f} cm")
print(f" Median error: {np.median(errors):+.4f} cm")
print(f" Std of error: {np.std(errors):.4f} cm")
print(f" Mean % error: {np.mean(pct_errors):+.1f}%")
print(f" MAE (absolute): {np.mean(np.abs(errors)):.4f} cm")
print(f" Max error: {np.max(np.abs(errors)):.4f} cm")
print(f" RMSE: {np.sqrt(np.mean(errors**2)):.4f} cm")
# Correlation
corr = np.corrcoef(cv, gt)[0, 1]
print(f" Pearson r: {corr:.4f}")
print(f" R²: {corr**2:.4f}")
return cv, gt, errors
def linear_regression(cv: np.ndarray, gt: np.ndarray, results: list[dict]):
"""Phase 4b: OLS regression + leave-one-person-out CV."""
section("5. Linear Regression Calibration")
# Fit: gt = a * cv + b
A = np.vstack([cv, np.ones(len(cv))]).T
(a, b), residuals, _, _ = np.linalg.lstsq(A, gt, rcond=None)
calibrated = a * cv + b
cal_errors = calibrated - gt
print(f" Model: actual = {a:.4f} × measured + {b:.4f}")
print(f" (i.e., slope={a:.4f}, intercept={b:.4f})")
print(f"\n After calibration:")
print(f" Mean error: {np.mean(cal_errors):+.4f} cm")
print(f" MAE: {np.mean(np.abs(cal_errors)):.4f} cm")
print(f" Max error: {np.max(np.abs(cal_errors)):.4f} cm")
print(f" RMSE: {np.sqrt(np.mean(cal_errors**2)):.4f} cm")
# Leave-one-person-out cross-validation
section("6. Leave-One-Person-Out Cross-Validation")
persons = sorted(set(r["person"] for r in results if r["cv_diameter_cm"] and r["gt_diameter_cm"]))
all_cv_errors = []
all_cv_cal_errors = []
print(f" {'Person':<10} {'N':>3} {'a':>7} {'b':>7} {'MAE_raw':>8} {'MAE_cal':>8} {'Max_cal':>8}")
print(f" {'-'*57}")
for holdout in persons:
# Train on all except holdout
train = [r for r in results
if r["cv_diameter_cm"] and r["gt_diameter_cm"] and r["person"] != holdout]
test = [r for r in results
if r["cv_diameter_cm"] and r["gt_diameter_cm"] and r["person"] == holdout]
train_cv = np.array([r["cv_diameter_cm"] for r in train])
train_gt = np.array([r["gt_diameter_cm"] for r in train])
test_cv = np.array([r["cv_diameter_cm"] for r in test])
test_gt = np.array([r["gt_diameter_cm"] for r in test])
A_train = np.vstack([train_cv, np.ones(len(train_cv))]).T
(a_fold, b_fold), _, _, _ = np.linalg.lstsq(A_train, train_gt, rcond=None)
test_cal = a_fold * test_cv + b_fold
raw_errors = np.abs(test_cv - test_gt)
cal_errors_fold = np.abs(test_cal - test_gt)
all_cv_errors.extend(raw_errors.tolist())
all_cv_cal_errors.extend(cal_errors_fold.tolist())
print(f" {holdout:<10} {len(test):>3} {a_fold:>7.4f} {b_fold:>+7.4f} "
f"{np.mean(raw_errors):>8.4f} {np.mean(cal_errors_fold):>8.4f} "
f"{np.max(cal_errors_fold):>8.4f}")
all_cv_errors = np.array(all_cv_errors)
all_cv_cal_errors = np.array(all_cv_cal_errors)
print(f"\n Cross-validated results (all holdout predictions):")
print(f" Raw MAE: {np.mean(all_cv_errors):.4f} cm")
print(f" Cal MAE: {np.mean(all_cv_cal_errors):.4f} cm")
print(f" Raw RMSE: {np.sqrt(np.mean(all_cv_errors**2)):.4f} cm")
print(f" Cal RMSE: {np.sqrt(np.mean(all_cv_cal_errors**2)):.4f} cm")
print(f" Improvement: {(1 - np.mean(all_cv_cal_errors)/np.mean(all_cv_errors))*100:.1f}% reduction in MAE")
return a, b
def generate_plots(cv: np.ndarray, gt: np.ndarray, a: float, b: float, out_dir: str):
"""Generate scatter plot and residual plot."""
if not HAS_PLT:
return
section("7. Generating Plots")
fig, axes = plt.subplots(1, 3, figsize=(16, 5))
# 1. Scatter: CV vs GT with regression line
ax = axes[0]
ax.scatter(cv, gt, alpha=0.6, s=30, label="Measurements")
lim = [min(cv.min(), gt.min()) - 0.1, max(cv.max(), gt.max()) + 0.1]
ax.plot(lim, lim, "k--", alpha=0.3, label="y=x (perfect)")
x_fit = np.linspace(lim[0], lim[1], 100)
ax.plot(x_fit, a * x_fit + b, "r-", linewidth=2,
label=f"Fit: y={a:.3f}x{b:+.3f}")
ax.set_xlabel("CV Measured Diameter (cm)")
ax.set_ylabel("Actual Diameter (cm)")
ax.set_title("CV Measured vs Actual (Caliper)")
ax.legend(fontsize=8)
ax.set_aspect("equal")
ax.grid(True, alpha=0.3)
# 2. Error distribution
ax = axes[1]
errors = cv - gt
cal_errors = (a * cv + b) - gt
ax.hist(errors, bins=15, alpha=0.5, label=f"Raw (μ={np.mean(errors):+.3f})")
ax.hist(cal_errors, bins=15, alpha=0.5, label=f"Calibrated (μ={np.mean(cal_errors):+.3f})")
ax.axvline(0, color="k", linestyle="--", alpha=0.3)
ax.set_xlabel("Error (cm)")
ax.set_ylabel("Count")
ax.set_title("Error Distribution: Before vs After Calibration")
ax.legend(fontsize=8)
ax.grid(True, alpha=0.3)
# 3. Residuals vs predicted
ax = axes[2]
calibrated = a * cv + b
ax.scatter(calibrated, cal_errors, alpha=0.6, s=30)
ax.axhline(0, color="k", linestyle="--", alpha=0.3)
ax.set_xlabel("Calibrated Diameter (cm)")
ax.set_ylabel("Residual (cm)")
ax.set_title("Residuals After Calibration")
ax.grid(True, alpha=0.3)
plt.tight_layout()
plot_path = os.path.join(out_dir, "calibration_analysis.png")
plt.savefig(plot_path, dpi=150)
print(f" Saved plot: {plot_path}")
plt.close()
def main():
base_dir = Path(__file__).resolve().parent.parent
results_path = base_dir / "output" / "batch" / "batch_results.json"
out_dir = str(base_dir / "output" / "batch")
results = load_results(str(results_path))
print(f"Loaded {len(results)} results")
# Phase 3
analyze_scale_stability(results)
analyze_repeatability(results)
check_ground_truth_sanity(results)
# Phase 4
cv, gt, errors = bias_analysis(results)
a, b = linear_regression(cv, gt, results)
generate_plots(cv, gt, a, b, out_dir)
# Summary
section("SUMMARY — Calibration Coefficients")
print(f" actual_diameter = {a:.6f} × measured_diameter + ({b:.6f})")
print(f" slope (a) = {a:.6f}")
print(f" offset (b) = {b:.6f}")
print(f"\n Save these to the pipeline configuration.")
# Write coefficients to file
coeff_path = os.path.join(out_dir, "calibration_coefficients.json")
with open(coeff_path, "w") as f:
json.dump({
"slope": round(a, 6),
"intercept": round(b, 6),
"description": "actual = slope * measured + intercept",
"n_samples": len(cv),
"dataset": "input/sample (10 people × 3 fingers × 2 shots)",
}, f, indent=2)
print(f" Saved coefficients: {coeff_path}")
if __name__ == "__main__":
main()