detection-heads / scripts /_person_image_push.py

update repository

74e3c01 about 1 month ago

5.16 kB

	"""Push image-level person classifier to 99% precision + 95% recall.
	Bigger model, focal loss, longer training, 5-fold CV."""
	import json, os, torch, torch.nn as nn, torch.nn.functional as F, time
	from pycocotools.coco import COCO

	COCO_ROOT = os.environ["ARENA_COCO_ROOT"]
	VAL_CACHE = os.environ["ARENA_VAL_CACHE"]
	SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))

	with open(os.path.join(SCRIPT_DIR, "..", "circuit", "evolved_extreme.json")) as f:
	evolved = json.load(f)
	dims = sorted(list(set([r for r in evolved if r["K"] == 100][0]["genome"])))
	N = len(dims)

	val = torch.load(VAL_CACHE, map_location="cpu", weights_only=False)
	coco = COCO(os.path.join(COCO_ROOT, "annotations", "instances_val2017.json"))

	def cofiber_decompose(f, n_scales):
	cofibers = []; residual = f
	for _ in range(n_scales - 1):
	omega = F.avg_pool2d(residual, 2)
	sigma_omega = F.interpolate(omega, size=residual.shape[2:], mode="bilinear", align_corners=False)
	cofibers.append(residual - sigma_omega); residual = omega
	cofibers.append(residual); return cofibers

	print("Pre-computing image vectors (92 evolved dims, max-pool)...", flush=True)
	all_vecs = []
	all_labels = []
	for idx in range(len(val)):
	item = val[idx]
	spatial = item["spatial"].unsqueeze(0).float()
	cofibers = cofiber_decompose(spatial, 3)
	feats = []
	for cof in cofibers:
	B, C, Hc, Wc = cof.shape
	f = F.layer_norm(cof.permute(0,2,3,1).reshape(-1,C), [C])
	feats.append(f)
	# Max-pool + mean-pool concatenated (184 dims)
	all_f = torch.cat(feats)[:, dims]
	vec = torch.cat([all_f.max(dim=0).values, all_f.mean(dim=0)])
	all_vecs.append(vec)
	hp = len(coco.getAnnIds(imgIds=int(item["img_id"]), catIds=[1], iscrowd=False)) > 0
	all_labels.append(1.0 if hp else 0.0)
	if (idx+1) % 1000 == 0:
	print(f" {idx+1}/{len(val)}", flush=True)

	X = torch.stack(all_vecs).cuda() # (5000, 184)
	Y = torch.tensor(all_labels).cuda()
	feat_dim = X.shape[1]
	print(f" {len(Y)} images, {int(Y.sum())} person, {feat_dim} feature dims\n")

	def focal_bce(logits, targets, alpha=0.25, gamma=2.0):
	p = logits.sigmoid()
	ce = F.binary_cross_entropy_with_logits(logits, targets, reduction="none")
	pt = p * targets + (1-p) * (1-targets)
	at = alpha * targets + (1-alpha) * (1-targets)
	return (at * (1-pt)*gamma ce).mean()

	# 5-fold CV
	architectures = [
	("184->128->64->1", lambda d: nn.Sequential(
	nn.Linear(d,128), nn.GELU(), nn.Dropout(0.1),
	nn.Linear(128,64), nn.GELU(), nn.Dropout(0.1),
	nn.Linear(64,1))),
	("184->256->128->64->1", lambda d: nn.Sequential(
	nn.Linear(d,256), nn.GELU(), nn.Dropout(0.1),
	nn.Linear(256,128), nn.GELU(), nn.Dropout(0.1),
	nn.Linear(128,64), nn.GELU(),
	nn.Linear(64,1))),
	("184->512->256->128->1", lambda d: nn.Sequential(
	nn.Linear(d,512), nn.GELU(), nn.Dropout(0.15),
	nn.Linear(512,256), nn.GELU(), nn.Dropout(0.15),
	nn.Linear(256,128), nn.GELU(),
	nn.Linear(128,1))),
	]

	for name, make_model in architectures:
	fold_size = 1000
	all_scores = []
	all_gt = []

	for fold in range(5):
	test_mask = torch.zeros(len(val), dtype=torch.bool, device="cuda")
	test_mask[foldfold_size:(fold+1)fold_size] = True
	train_mask = ~test_mask

	model = make_model(feat_dim).cuda()
	n_params = sum(p.numel() for p in model.parameters())
	opt = torch.optim.AdamW(model.parameters(), lr=5e-4, weight_decay=1e-3)
	scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(opt, T_max=500)

	model.train()
	for epoch in range(500):
	idx = torch.randperm(train_mask.sum().item(), device="cuda")
	train_x = X[train_mask][idx]
	train_y = Y[train_mask][idx]
	for start in range(0, len(train_x), 128):
	batch_x = train_x[start:start+128]
	batch_y = train_y[start:start+128]
	logits = model(batch_x).squeeze()
	loss = focal_bce(logits, batch_y)
	opt.zero_grad(); loss.backward(); opt.step()
	scheduler.step()

	model.eval()
	with torch.no_grad():
	scores = model(X[test_mask]).squeeze().sigmoid()
	all_scores.append(scores.cpu())
	all_gt.append(Y[test_mask].cpu())

	all_scores = torch.cat(all_scores)
	all_gt = torch.cat(all_gt).bool()

	print(f"\n{name} ({n_params} params):")
	print(f" {'Thresh':>6s} {'TP':>5s} {'FP':>5s} {'FN':>5s} {'TN':>5s} {'Prec':>6s} {'Rec':>6s} {'F1':>6s}")
	for t in [0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.95, 0.99]:
	pred = all_scores > t
	tp = (pred & all_gt).sum().item()
	fp = (pred & ~all_gt).sum().item()
	fn = (~pred & all_gt).sum().item()
	tn = (~pred & ~all_gt).sum().item()
	prec = tp / max(tp+fp, 1); rec = tp / max(tp+fn, 1)
	f1 = 2precrec / max(prec+rec, 1e-9)
	marker = " ***" if prec >= 0.99 and rec >= 0.90 else (" <<" if prec >= 0.99 else "")
	print(f" {t:6.2f} {tp:5d} {fp:5d} {fn:5d} {tn:5d} {prec:6.3f} {rec:6.3f} {f1:6.3f}{marker}")