Initial release: badger-55 meter reader

3800bd2 verified 15 days ago

16.5 kB

	"""End-to-end inference demo on a single full-meter image.

	full BGR frame ─► deskew ─► rectify (175 × 736 strip)
	│
	▼
	8 × (105 × 86) slot crops
	│
	▼
	DINOv2-small CLS features (8, 384)
	│
	┌──────────┴──────────┐
	▼ ▼
	digit_classifier d4d5 / d6d7 Predictor90
	(10-way digit) (theta + entropy)
	└──────────┬──────────┘
	▼
	8-digit reading + per-slot annotation card

	The same `digit_classifier` (trained pooled across slots 4+5+6+7 — the
	slots in the dataset that exercise all 10 digit classes) is applied to
	slots 0–4 at inference. On captures from the source meter, slots 0–3
	will emit constants — that's because the upper drums of that specific
	meter didn't move during data collection, so the trained classifier
	sees an input that looks exactly like one digit class for those slots.

	Usage:
	python demo.py # pick one sample capture
	python demo.py --image PATH # supply your own 1920×1080 frame
	python demo.py --image PATH --out OUT.jpg
	"""
	from __future__ import annotations

	import argparse
	import sys
	import textwrap
	import time
	import os
	from pathlib import Path

	# Match train.py — turn on the high-perf downloader before importing
	# huggingface_hub. The default Python downloader silently throttles on
	# repeated single-file fetches against the same dataset.
	os.environ.setdefault('HF_XET_HIGH_PERFORMANCE', '1')
	os.environ.setdefault('HF_HUB_DOWNLOAD_TIMEOUT', '30')

	import cv2
	import numpy as np
	import pandas as pd
	import torch
	from huggingface_hub import hf_hub_download

	import models
	import rectifier


	HERE = Path(__file__).parent
	WEIGHTS = HERE / 'weights'
	DATASET_ID = 'S3CUR/badger-55-watermeter'

	# ── inference ─────────────────────────────────────────────────────────
	def infer_full_frame(img_bgr: np.ndarray, weights_dir: Path,
	device: str \| None = None):
	"""Run the full pipeline. Returns a dict with the rectified strip,
	per-slot predictions, and rectify info."""
	if device is None:
	device = 'cuda' if torch.cuda.is_available() else 'cpu'
	tight, info = rectifier.rectify(img_bgr)
	if tight is None:
	raise RuntimeError(f"rectifier failed: {info.get('error', 'unknown')}")
	slots = rectifier.tight_to_slots(tight)
	slots_arr = models.slot_crops_to_array(slots)

	dino = models.DinoV2(device=device)
	feats = dino.features(slots_arr) # (8, 384)

	# Heads
	d4d5 = models.Predictor90().to(device).eval()
	d4d5.load_state_dict(torch.load(weights_dir / 'd4d5_predictor90.pt',
	map_location=device, weights_only=True))
	d6d7 = models.Predictor90().to(device).eval()
	d6d7.load_state_dict(torch.load(weights_dir / 'd6d7_predictor90.pt',
	map_location=device, weights_only=True))
	digit_cls = models.SlotClassifier().to(device).eval()
	digit_cls.load_state_dict(torch.load(weights_dir / 'digit_classifier.pt',
	map_location=device, weights_only=True))

	with torch.no_grad():
	# Two heads on every applicable slot: classifier (10-way digit)
	# and predictor90 (90-bin angular). d4d5_* trained on slots {4,5}
	# but applies fine to slots 0..5 since the upper drums are visually
	# similar; d6d7_* covers slots {6,7}.
	cls_probs_all = digit_cls(feats).softmax(dim=-1) # (8, 10)
	cls_digits_all = cls_probs_all.argmax(dim=-1)
	d4d5_p90 = models.predictor90_decode(d4d5(feats[0:6]))
	d6d7_p90 = models.predictor90_decode(d6d7(feats[6:8]))

	per_slot = []
	for s in range(8):
	cla_d = int(cls_digits_all[s].item())
	cla_p = float(cls_probs_all[s, cla_d])
	if s <= 5:
	p90_d = int(d4d5_p90['digit'][s])
	p90_t = float(d4d5_p90['theta_deg'][s])
	p90_p = float(d4d5_p90['top1_prob'][s])
	p90_h = float(d4d5_p90['entropy'][s])
	else:
	i = s - 6
	p90_d = int(d6d7_p90['digit'][i])
	p90_t = float(d6d7_p90['theta_deg'][i])
	p90_p = float(d6d7_p90['top1_prob'][i])
	p90_h = float(d6d7_p90['entropy'][i])

	# ── Two-voter consensus ────────────────────────────────────────
	# When CLA and P90 agree, take that. When they disagree, prefer
	# P90 for d1-d7 (its θ disambiguates upper-drum mid-roll cleanly
	# — empirically the right call when the slot ever sees motion).
	# For d0, defer to CLA: it's a hard constant on this meter, and
	# P90 on a constant slot is meaningless.
	if cla_d == p90_d:
	consensus, conf, source = cla_d, max(cla_p, p90_p), 'agree'
	elif s == 0:
	consensus, conf, source = cla_d, cla_p, 'classifier'
	else:
	consensus, conf, source = p90_d, p90_p, 'predictor90'

	per_slot.append({
	'slot': s,
	'digit': consensus,
	'top1_prob': conf,
	'theta_deg': p90_t,
	'entropy': p90_h,
	'source': source,
	'cla_digit': cla_d, 'cla_prob': cla_p,
	'p90_digit': p90_d, 'p90_prob': p90_p,
	})

	reading_str = ''.join(str(p['digit']) for p in per_slot)
	return {
	'reading': reading_str,
	'gallons': int(reading_str) / 10.0,
	'tight': tight,
	'slots': slots,
	'per_slot': per_slot,
	'rectify_info': info,
	}


	# ── rendering ─────────────────────────────────────────────────────────
	# Layout constants — mirror the production renderer so the visual
	# language is familiar (header → frame → rectified strip → voter cards).
	# Dropped relative to production: SDR row, PREV-reading delta row, WARN
	# row, VLM voter, cascade-gate state promotion. None of those apply to a
	# standalone single-frame demo.
	CANVAS_W = 940
	HEADER_H = 86
	SOURCE_H = 540
	STRIP_MAX_H = 200
	CARD_PANEL_H = 168
	NSLOTS = 8
	CARD_GAP = 6
	PAD = 4

	BG = (18, 18, 18)
	CARD_BG = (36, 36, 32)
	CARD_BORDER = (64, 64, 60)
	FG = (230, 230, 230)
	DIM = (140, 140, 140)
	NA = (90, 90, 90)
	AGREE = (80, 200, 60) # green
	DISAGREE = (60, 60, 240) # red (BGR)
	EDGE_C = (60, 180, 230) # yellow

	STATE_COL = {'OK': AGREE, 'EDGE': EDGE_C, 'DISPUTED': DISAGREE}

	FONT = cv2.FONT_HERSHEY_SIMPLEX


	def _state_for(conf: float \| None) -> str:
	"""Confidence → state badge. Same thresholds as the production
	`_display_conf` color picker (≥0.85 green, ≥0.50 yellow, else red)."""
	if conf is None: return 'OK'
	if conf >= 0.85: return 'OK'
	if conf >= 0.50: return 'EDGE'
	return 'DISPUTED'


	def _draw_voter_row(out, x0, y, label, vote, agree, informational=False):
	"""One voter row inside a card. Three render states:
	- vote=None → '-' placeholder in grey
	- informational=True → grey digit, no chip/circle
	- else → colored chip + agreement circle"""
	cv2.putText(out, label, (x0 + 8, y), FONT, 0.40, DIM, 1)
	if vote is None:
	cv2.putText(out, '-', (x0 + 44, y), FONT, 0.42, NA, 1)
	return
	if informational:
	cv2.putText(out, vote, (x0 + 43, y), FONT, 0.42, DIM, 1)
	return
	col = AGREE if agree else DISAGREE
	cv2.rectangle(out, (x0 + 40, y - 10), (x0 + 56, y + 3), col, -1)
	cv2.putText(out, vote, (x0 + 43, y), FONT, 0.42, (15, 15, 15), 1)
	cv2.circle(out, (x0 + 70, y - 4), 4, col, -1)


	def _draw_card(canvas, x0, y0, card_w, card_h, per_slot):
	digit_s = str(per_slot['digit'])
	conf = per_slot.get('top1_prob')
	state = _state_for(conf)
	sc = STATE_COL[state]

	# Backdrop + border + state stripe
	cv2.rectangle(canvas, (x0, y0), (x0 + card_w, y0 + card_h), CARD_BG, -1)
	cv2.rectangle(canvas, (x0, y0), (x0 + card_w, y0 + card_h), CARD_BORDER, 1)
	cv2.rectangle(canvas, (x0, y0), (x0 + 3, y0 + card_h), sc, -1)

	# Slot label top-left
	cv2.putText(canvas, f"d{per_slot['slot']}", (x0 + 8, y0 + 14),
	FONT, 0.40, DIM, 1)

	# Big committed digit, centered
	big_color = DISAGREE if state == 'DISPUTED' else FG
	(tw, th), _ = cv2.getTextSize(digit_s, FONT, 1.3, 2)
	cv2.putText(canvas, digit_s,
	(x0 + (card_w - tw) // 2, y0 + 14 + th + 4),
	FONT, 1.3, big_color, 2)

	# Confidence % top-right
	if conf is not None:
	conf_col = (AGREE if conf >= 0.85
	else EDGE_C if conf >= 0.50 else DISAGREE)
	cv2.putText(canvas, f"{int(conf * 100)}%",
	(x0 + card_w - 34, y0 + 14), FONT, 0.40, conf_col, 1)

	# θ for d4-d7
	theta = per_slot.get('theta_deg')
	if theta is not None:
	cv2.putText(canvas, f"{int(theta)}deg",
	(x0 + card_w // 2 - 18, y0 + 80), FONT, 0.40, DIM, 1)

	# Voter rows — CLA + P90. Both heads run on every applicable slot;
	# the head whose vote MATCHES the consensus gets a colored chip,
	# the other shows its digit in informational grey.
	voters_y = y0 + 90
	row_h = 17
	cla_v = str(per_slot['cla_digit'])
	p90_v = str(per_slot['p90_digit'])

	_draw_voter_row(canvas, x0, voters_y,
	'CLA', cla_v, cla_v == digit_s,
	informational=(cla_v != digit_s))
	_draw_voter_row(canvas, x0, voters_y + row_h,
	'P90', p90_v, p90_v == digit_s,
	informational=(p90_v != digit_s))


	def render_result(img_bgr: np.ndarray, result: dict, out_path: Path):
	"""Compose a production-style annotated image:
	header → original frame → rectified strip → 8 voter cards."""
	canvas_w = CANVAS_W

	# Scale the source frame to canvas_w, capped at SOURCE_H tall
	src_h, src_w = img_bgr.shape[:2]
	src_scale = min(canvas_w / src_w, SOURCE_H / src_h)
	src_w_s = int(src_w * src_scale)
	src_h_s = int(src_h * src_scale)
	src_scaled = cv2.resize(img_bgr, (src_w_s, src_h_s),
	interpolation=cv2.INTER_LANCZOS4)

	# Rectified strip — scale up to canvas_w
	tight = result['tight']
	th, tw = tight.shape[:2]
	t_scale = min(canvas_w / tw, STRIP_MAX_H / th)
	tw_s = int(tw * t_scale)
	th_s = int(th * t_scale)
	strip_scaled = cv2.resize(tight, (tw_s, th_s),
	interpolation=cv2.INTER_LANCZOS4)

	# Card panel
	card_w = (canvas_w - (NSLOTS - 1) * CARD_GAP) // NSLOTS
	card_h = CARD_PANEL_H

	total_h = HEADER_H + src_h_s + th_s + card_h + 4 * PAD
	canvas = np.full((total_h, canvas_w, 3), BG, dtype=np.uint8)

	# ── header ────────────────────────────────────────────────────────
	info = result['rectify_info']
	cv2.putText(canvas, f"reading {result['reading']}",
	(12, 32), FONT, 0.85, FG, 2)
	cv2.putText(canvas, f"{result['gallons']:.1f} gallons",
	(12, 58), FONT, 0.50, DIM, 1)
	meta = (f"deskew {info['deskew_angle']:+.2f}deg \| "
	f"{info['n_windows']} windows \| "
	f"residual {info.get('mean_residual_px', 0):.2f}px")
	(mw, _), _ = cv2.getTextSize(meta, FONT, 0.42, 1)
	cv2.putText(canvas, meta, (canvas_w - mw - 12, 32),
	FONT, 0.42, DIM, 1)

	# ── source frame ──────────────────────────────────────────────────
	y = HEADER_H
	src_x = (canvas_w - src_w_s) // 2
	canvas[y:y + src_h_s, src_x:src_x + src_w_s] = src_scaled
	y += src_h_s + PAD

	# ── rectified strip ───────────────────────────────────────────────
	strip_x = (canvas_w - tw_s) // 2
	canvas[y:y + th_s, strip_x:strip_x + tw_s] = strip_scaled
	y += th_s + PAD

	# ── voter cards ───────────────────────────────────────────────────
	for i, ps in enumerate(result['per_slot']):
	cx = i * (card_w + CARD_GAP)
	_draw_card(canvas, cx, y, card_w, card_h, ps)

	cv2.imwrite(str(out_path), canvas)
	print(f"[render] wrote {out_path} ({canvas.shape[1]}×{canvas.shape[0]})")


	# ── pick a sample image from the HF dataset cache ─────────────────────
	def pick_sample_image(cache_dir: Path \| None = None) -> np.ndarray:
	"""Fetch the captures parquet, pick one clean frame, decode bytes
	into a BGR numpy array. The dataset is self-contained in a single
	parquet — JPEG bytes are inline — so this is one HTTP request and
	no loose-file roulette."""
	kwargs = {'repo_id': DATASET_ID, 'repo_type': 'dataset'}
	if cache_dir:
	kwargs['cache_dir'] = str(cache_dir)
	print(f"[hf] fetching {DATASET_ID}:captures.parquet")
	parquet_path = hf_hub_download(filename='captures.parquet', **kwargs)
	caps_df = pd.read_parquet(parquet_path)
	# Prefer test-split captures with the most human-tagged slots (cleanest
	# frames in the dataset).
	cands = caps_df[caps_df['split'] == 'test'].sort_values(
	'n_slots_known', ascending=False).head(10)
	if not len(cands):
	cands = caps_df.head(10)
	pick = cands.iloc[0]
	print(f"[hf] picked capture_id={pick.get('capture_id')} "
	f"captured_at={pick['captured_at']} "
	f"n_known={pick['n_slots_known']}")
	img = cv2.imdecode(np.frombuffer(pick['image_bytes'], np.uint8),
	cv2.IMREAD_COLOR)
	if img is None:
	raise RuntimeError("failed to decode image_bytes for the chosen row")
	return img


	# ── main ──────────────────────────────────────────────────────────────
	def main():
	ap = argparse.ArgumentParser(formatter_class=argparse.RawDescriptionHelpFormatter,
	description=textwrap.dedent(__doc__))
	ap.add_argument('--image', default=None,
	help='Path to a 1920×1080 BGR meter capture; default = pick one from the dataset')
	ap.add_argument('--out', default=str(HERE / 'demo_output.jpg'))
	ap.add_argument('--cache-dir', default=None)
	ap.add_argument('--device', default='cuda' if torch.cuda.is_available() else 'cpu')
	args = ap.parse_args()

	if args.image:
	img_path = Path(args.image)
	img = cv2.imread(str(img_path))
	if img is None:
	sys.exit(f"cannot read {img_path}")
	print(f"[demo] loaded {img.shape[1]}×{img.shape[0]} from {img_path}")
	else:
	img = pick_sample_image(
	Path(args.cache_dir) if args.cache_dir else None)
	print(f"[demo] loaded {img.shape[1]}×{img.shape[0]} from HF dataset row")

	t0 = time.time()
	result = infer_full_frame(img, WEIGHTS, device=args.device)
	print(f"[infer] reading={result['reading']} "
	f"({result['gallons']:.1f} gal) in {(time.time()-t0)*1000:.0f} ms")
	for ps in result['per_slot']:
	bits = f"d{ps['slot']}={ps['digit']}"
	if ps['theta_deg'] is not None:
	bits += f" θ={ps['theta_deg']:6.2f}° p={ps['top1_prob']:.3f}"
	bits += f" [{ps['source']}]"
	print(" ", bits)

	render_result(img, result, Path(args.out))


	if __name__ == '__main__':
	main()