Spaces:

feng-x
/

ring-sizer

Running

App Files Files Community

ring-sizer / script /analyze_calibration.py

feng-x

Upload folder using huggingface_hub

8bc7d2f verified about 1 month ago

raw

history blame contribute delete

11.4 kB

	#!/usr/bin/env python3
	"""Analysis & regression script for calibration dataset.

	Performs:
	1. px/cm stability analysis
	2. A vs B repeatability
	3. Ground truth sanity check (π×diameter vs circumference)
	4. Scatter plot & bias analysis
	5. Linear regression with leave-one-person-out cross-validation
	"""

	import json
	import math
	import os
	from pathlib import Path

	import numpy as np

	# Optional: matplotlib for plots (skip gracefully if missing)
	try:
	import matplotlib
	matplotlib.use("Agg")
	import matplotlib.pyplot as plt
	HAS_PLT = True
	except ImportError:
	HAS_PLT = False
	print("Warning: matplotlib not available, skipping plots")


	def load_results(path: str) -> list[dict]:
	with open(path, encoding="utf-8") as f:
	return json.load(f)


	def section(title: str):
	print(f"\n{'='*60}")
	print(f" {title}")
	print(f"{'='*60}")


	def analyze_scale_stability(results: list[dict]):
	"""Phase 3a: px/cm stability across all images."""
	section("1. Card Detection / px/cm Stability")

	scales = {}
	for r in results:
	img = r["image"]
	if img not in scales and r["cv_scale_px_per_cm"]:
	scales[img] = r["cv_scale_px_per_cm"]

	vals = list(scales.values())
	mean_s = np.mean(vals)
	std_s = np.std(vals)
	cv_pct = (std_s / mean_s) * 100

	print(f" Images analyzed: {len(scales)}")
	print(f" Mean px/cm: {mean_s:.2f}")
	print(f" Std px/cm: {std_s:.2f}")
	print(f" CV%: {cv_pct:.2f}%")
	print(f" Range: {min(vals):.2f} — {max(vals):.2f}")
	print(f" Max spread: {max(vals)-min(vals):.2f} px/cm ({(max(vals)-min(vals))/mean_s*100:.2f}%)")

	# Per-image table
	print(f"\n {'Image':<16} {'px/cm':>8} {'Δ from mean':>12}")
	print(f" {'-'*38}")
	for img, s in sorted(scales.items()):
	print(f" {img:<16} {s:>8.2f} {s-mean_s:>+12.2f}")

	return mean_s, std_s


	def analyze_repeatability(results: list[dict]):
	"""Phase 3b: A vs B repeatability."""
	section("2. A vs B Repeatability")

	# Group by (person, finger)
	pairs = {}
	for r in results:
	key = (r["person"], r["finger_en"])
	if key not in pairs:
	pairs[key] = {}
	pairs[key][r["shot"]] = r["cv_diameter_cm"]

	diffs = []
	print(f" {'Person':<10} {'Finger':<8} {'Shot A':>8} {'Shot B':>8} {'Δ(B-A)':>8} {'%diff':>7}")
	print(f" {'-'*53}")
	for (person, finger), shots in sorted(pairs.items()):
	a = shots.get("A")
	b = shots.get("B")
	if a and b:
	d = b - a
	pct = abs(d) / ((a + b) / 2) * 100
	diffs.append(abs(d))
	print(f" {person:<10} {finger:<8} {a:>8.3f} {b:>8.3f} {d:>+8.3f} {pct:>6.1f}%")

	if diffs:
	print(f"\n Mean \|A-B\| difference: {np.mean(diffs):.4f} cm")
	print(f" Max \|A-B\| difference: {max(diffs):.4f} cm")
	print(f" Std \|A-B\| difference: {np.std(diffs):.4f} cm")
	print(f" 95th percentile: {np.percentile(diffs, 95):.4f} cm")

	return diffs


	def check_ground_truth_sanity(results: list[dict]):
	"""Phase 3c: Check π×diameter ≈ circumference."""
	section("3. Ground Truth Sanity (π×diameter vs circumference)")

	seen = set()
	diffs = []
	print(f" {'Person':<10} {'Finger':<6} {'Diam':>6} {'Circ':>6} {'π×D':>6} {'Δ':>7} {'%err':>6}")
	print(f" {'-'*55}")

	for r in results:
	key = (r["person"], r["finger_cn"])
	if key in seen:
	continue
	seen.add(key)

	d = r["gt_diameter_cm"]
	c = r["gt_circumference_cm"]
	if d and c:
	pi_d = math.pi * d
	diff = c - pi_d
	pct = diff / c * 100
	diffs.append(diff)
	print(f" {r['person']:<10} {r['finger_cn']:<6} {d:>6.2f} {c:>6.1f} {pi_d:>6.2f} {diff:>+7.2f} {pct:>+5.1f}%")

	if diffs:
	print(f"\n Mean (circ - π×diam): {np.mean(diffs):+.3f} cm")
	print(f" This is expected: circumference > π×diameter because")
	print(f" fingers are not perfect circles (slightly oval/flattened).")


	def bias_analysis(results: list[dict]):
	"""Phase 4a: Scatter plot and bias analysis."""
	section("4. Accuracy & Bias Analysis")

	valid = [r for r in results if r["cv_diameter_cm"] and r["gt_diameter_cm"]]
	cv = np.array([r["cv_diameter_cm"] for r in valid])
	gt = np.array([r["gt_diameter_cm"] for r in valid])
	errors = cv - gt
	pct_errors = errors / gt * 100

	print(f" N = {len(valid)} measurements")
	print(f" Mean error (CV-GT): {np.mean(errors):+.4f} cm")
	print(f" Median error: {np.median(errors):+.4f} cm")
	print(f" Std of error: {np.std(errors):.4f} cm")
	print(f" Mean % error: {np.mean(pct_errors):+.1f}%")
	print(f" MAE (absolute): {np.mean(np.abs(errors)):.4f} cm")
	print(f" Max error: {np.max(np.abs(errors)):.4f} cm")
	print(f" RMSE: {np.sqrt(np.mean(errors**2)):.4f} cm")

	# Correlation
	corr = np.corrcoef(cv, gt)[0, 1]
	print(f" Pearson r: {corr:.4f}")
	print(f" R²: {corr**2:.4f}")

	return cv, gt, errors


	def linear_regression(cv: np.ndarray, gt: np.ndarray, results: list[dict]):
	"""Phase 4b: OLS regression + leave-one-person-out CV."""
	section("5. Linear Regression Calibration")

	# Fit: gt = a * cv + b
	A = np.vstack([cv, np.ones(len(cv))]).T
	(a, b), residuals, _, _ = np.linalg.lstsq(A, gt, rcond=None)

	calibrated = a * cv + b
	cal_errors = calibrated - gt

	print(f" Model: actual = {a:.4f} × measured + {b:.4f}")
	print(f" (i.e., slope={a:.4f}, intercept={b:.4f})")
	print(f"\n After calibration:")
	print(f" Mean error: {np.mean(cal_errors):+.4f} cm")
	print(f" MAE: {np.mean(np.abs(cal_errors)):.4f} cm")
	print(f" Max error: {np.max(np.abs(cal_errors)):.4f} cm")
	print(f" RMSE: {np.sqrt(np.mean(cal_errors**2)):.4f} cm")

	# Leave-one-person-out cross-validation
	section("6. Leave-One-Person-Out Cross-Validation")

	persons = sorted(set(r["person"] for r in results if r["cv_diameter_cm"] and r["gt_diameter_cm"]))
	all_cv_errors = []
	all_cv_cal_errors = []

	print(f" {'Person':<10} {'N':>3} {'a':>7} {'b':>7} {'MAE_raw':>8} {'MAE_cal':>8} {'Max_cal':>8}")
	print(f" {'-'*57}")

	for holdout in persons:
	# Train on all except holdout
	train = [r for r in results
	if r["cv_diameter_cm"] and r["gt_diameter_cm"] and r["person"] != holdout]
	test = [r for r in results
	if r["cv_diameter_cm"] and r["gt_diameter_cm"] and r["person"] == holdout]

	train_cv = np.array([r["cv_diameter_cm"] for r in train])
	train_gt = np.array([r["gt_diameter_cm"] for r in train])
	test_cv = np.array([r["cv_diameter_cm"] for r in test])
	test_gt = np.array([r["gt_diameter_cm"] for r in test])

	A_train = np.vstack([train_cv, np.ones(len(train_cv))]).T
	(a_fold, b_fold), _, _, _ = np.linalg.lstsq(A_train, train_gt, rcond=None)

	test_cal = a_fold * test_cv + b_fold
	raw_errors = np.abs(test_cv - test_gt)
	cal_errors_fold = np.abs(test_cal - test_gt)

	all_cv_errors.extend(raw_errors.tolist())
	all_cv_cal_errors.extend(cal_errors_fold.tolist())

	print(f" {holdout:<10} {len(test):>3} {a_fold:>7.4f} {b_fold:>+7.4f} "
	f"{np.mean(raw_errors):>8.4f} {np.mean(cal_errors_fold):>8.4f} "
	f"{np.max(cal_errors_fold):>8.4f}")

	all_cv_errors = np.array(all_cv_errors)
	all_cv_cal_errors = np.array(all_cv_cal_errors)

	print(f"\n Cross-validated results (all holdout predictions):")
	print(f" Raw MAE: {np.mean(all_cv_errors):.4f} cm")
	print(f" Cal MAE: {np.mean(all_cv_cal_errors):.4f} cm")
	print(f" Raw RMSE: {np.sqrt(np.mean(all_cv_errors**2)):.4f} cm")
	print(f" Cal RMSE: {np.sqrt(np.mean(all_cv_cal_errors**2)):.4f} cm")
	print(f" Improvement: {(1 - np.mean(all_cv_cal_errors)/np.mean(all_cv_errors))*100:.1f}% reduction in MAE")

	return a, b


	def generate_plots(cv: np.ndarray, gt: np.ndarray, a: float, b: float, out_dir: str):
	"""Generate scatter plot and residual plot."""
	if not HAS_PLT:
	return

	section("7. Generating Plots")

	fig, axes = plt.subplots(1, 3, figsize=(16, 5))

	# 1. Scatter: CV vs GT with regression line
	ax = axes[0]
	ax.scatter(cv, gt, alpha=0.6, s=30, label="Measurements")
	lim = [min(cv.min(), gt.min()) - 0.1, max(cv.max(), gt.max()) + 0.1]
	ax.plot(lim, lim, "k--", alpha=0.3, label="y=x (perfect)")
	x_fit = np.linspace(lim[0], lim[1], 100)
	ax.plot(x_fit, a * x_fit + b, "r-", linewidth=2,
	label=f"Fit: y={a:.3f}x{b:+.3f}")
	ax.set_xlabel("CV Measured Diameter (cm)")
	ax.set_ylabel("Actual Diameter (cm)")
	ax.set_title("CV Measured vs Actual (Caliper)")
	ax.legend(fontsize=8)
	ax.set_aspect("equal")
	ax.grid(True, alpha=0.3)

	# 2. Error distribution
	ax = axes[1]
	errors = cv - gt
	cal_errors = (a * cv + b) - gt
	ax.hist(errors, bins=15, alpha=0.5, label=f"Raw (μ={np.mean(errors):+.3f})")
	ax.hist(cal_errors, bins=15, alpha=0.5, label=f"Calibrated (μ={np.mean(cal_errors):+.3f})")
	ax.axvline(0, color="k", linestyle="--", alpha=0.3)
	ax.set_xlabel("Error (cm)")
	ax.set_ylabel("Count")
	ax.set_title("Error Distribution: Before vs After Calibration")
	ax.legend(fontsize=8)
	ax.grid(True, alpha=0.3)

	# 3. Residuals vs predicted
	ax = axes[2]
	calibrated = a * cv + b
	ax.scatter(calibrated, cal_errors, alpha=0.6, s=30)
	ax.axhline(0, color="k", linestyle="--", alpha=0.3)
	ax.set_xlabel("Calibrated Diameter (cm)")
	ax.set_ylabel("Residual (cm)")
	ax.set_title("Residuals After Calibration")
	ax.grid(True, alpha=0.3)

	plt.tight_layout()
	plot_path = os.path.join(out_dir, "calibration_analysis.png")
	plt.savefig(plot_path, dpi=150)
	print(f" Saved plot: {plot_path}")
	plt.close()


	def main():
	base_dir = Path(__file__).resolve().parent.parent
	results_path = base_dir / "output" / "batch" / "batch_results.json"
	out_dir = str(base_dir / "output" / "batch")

	results = load_results(str(results_path))
	print(f"Loaded {len(results)} results")

	# Phase 3
	analyze_scale_stability(results)
	analyze_repeatability(results)
	check_ground_truth_sanity(results)

	# Phase 4
	cv, gt, errors = bias_analysis(results)
	a, b = linear_regression(cv, gt, results)
	generate_plots(cv, gt, a, b, out_dir)

	# Summary
	section("SUMMARY — Calibration Coefficients")
	print(f" actual_diameter = {a:.6f} × measured_diameter + ({b:.6f})")
	print(f" slope (a) = {a:.6f}")
	print(f" offset (b) = {b:.6f}")
	print(f"\n Save these to the pipeline configuration.")

	# Write coefficients to file
	coeff_path = os.path.join(out_dir, "calibration_coefficients.json")
	with open(coeff_path, "w") as f:
	json.dump({
	"slope": round(a, 6),
	"intercept": round(b, 6),
	"description": "actual = slope * measured + intercept",
	"n_samples": len(cv),
	"dataset": "input/sample (10 people × 3 fingers × 2 shots)",
	}, f, indent=2)
	print(f" Saved coefficients: {coeff_path}")


	if __name__ == "__main__":
	main()