"""Train a tiny MLP on 92 evolved dims for image-level person classification.""" import json, os, torch, torch.nn as nn import torch.nn.functional as F from pycocotools.coco import COCO COCO_ROOT = os.environ["ARENA_COCO_ROOT"] VAL_CACHE = os.environ["ARENA_VAL_CACHE"] SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) with open(os.path.join(SCRIPT_DIR, "..", "circuit", "evolved_extreme.json")) as f: evolved = json.load(f) dims = sorted(list(set([r for r in evolved if r["K"] == 100][0]["genome"]))) N = len(dims) val = torch.load(VAL_CACHE, map_location="cpu", weights_only=False) coco = COCO(os.path.join(COCO_ROOT, "annotations", "instances_val2017.json")) PERSON_CAT = 1 def cofiber_decompose(f, n_scales): cofibers = []; residual = f for _ in range(n_scales - 1): omega = F.avg_pool2d(residual, 2) sigma_omega = F.interpolate(omega, size=residual.shape[2:], mode="bilinear", align_corners=False) cofibers.append(residual - sigma_omega); residual = omega cofibers.append(residual); return cofibers print("Pre-computing image vectors...", flush=True) all_vecs = [] all_labels = [] for idx in range(len(val)): item = val[idx] spatial = item["spatial"].unsqueeze(0).float() cofibers = cofiber_decompose(spatial, 3) feats = [] for cof in cofibers: B, C, Hc, Wc = cof.shape f = F.layer_norm(cof.permute(0, 2, 3, 1).reshape(-1, C), [C]) feats.append(f) all_vecs.append(torch.cat(feats)[:, dims].max(dim=0).values) hp = len(coco.getAnnIds(imgIds=int(item["img_id"]), catIds=[PERSON_CAT], iscrowd=False)) > 0 all_labels.append(1.0 if hp else 0.0) if (idx + 1) % 1000 == 0: print(f" {idx+1}/{len(val)}", flush=True) X = torch.stack(all_vecs).cuda() Y = torch.tensor(all_labels).cuda() # 5-fold CV with MLP print(f"\n5-fold CV with MLPs on {N} evolved dims\n", flush=True) for hidden, layers_desc in [(32, "92->32->1"), (64, "92->64->1"), (128, "92->64->64->1"), (256, "92->128->64->1")]: fold_size = 1000 all_tp = all_fp = all_fn = all_tn = 0 for fold in range(5): test_mask = torch.zeros(len(val), dtype=torch.bool, device="cuda") test_mask[fold * fold_size:(fold + 1) * fold_size] = True train_mask = ~test_mask train_x = X[train_mask] train_y = Y[train_mask] test_x = X[test_mask] test_y = Y[test_mask] # Build MLP if layers_desc == "92->32->1": model = nn.Sequential(nn.Linear(N, 32), nn.GELU(), nn.Linear(32, 1)).cuda() elif layers_desc == "92->64->1": model = nn.Sequential(nn.Linear(N, 64), nn.GELU(), nn.Linear(64, 1)).cuda() elif layers_desc == "92->64->64->1": model = nn.Sequential(nn.Linear(N, 64), nn.GELU(), nn.Linear(64, 64), nn.GELU(), nn.Linear(64, 1)).cuda() else: model = nn.Sequential(nn.Linear(N, 128), nn.GELU(), nn.Linear(128, 64), nn.GELU(), nn.Linear(64, 1)).cuda() n_params = sum(p.numel() for p in model.parameters()) opt = torch.optim.Adam(model.parameters(), lr=1e-3) # Train model.train() for epoch in range(200): idx = torch.randperm(train_x.shape[0], device="cuda") for start in range(0, len(idx), 256): batch = idx[start:start + 256] logits = model(train_x[batch]).squeeze() loss = F.binary_cross_entropy_with_logits(logits, train_y[batch]) opt.zero_grad(); loss.backward(); opt.step() # Eval at multiple thresholds model.eval() with torch.no_grad(): scores = model(test_x).squeeze().sigmoid() # Find best threshold for 99% precision best_t = 0.5 best_rec = 0.0 for t_int in range(50, 100): t = t_int / 100.0 pred = scores > t tp = (pred & test_y.bool()).sum().item() fp = (pred & ~test_y.bool()).sum().item() fn = (~pred & test_y.bool()).sum().item() prec = tp / max(tp + fp, 1) rec = tp / max(tp + fn, 1) if prec >= 0.99 and rec > best_rec: best_rec = rec best_t = t pred = scores > best_t tp = (pred & test_y.bool()).sum().item() fp = (pred & ~test_y.bool()).sum().item() fn = (~pred & test_y.bool()).sum().item() tn = (~pred & ~test_y.bool()).sum().item() all_tp += tp; all_fp += fp; all_fn += fn; all_tn += tn prec = all_tp / max(all_tp + all_fp, 1) rec = all_tp / max(all_tp + all_fn, 1) f1 = 2 * prec * rec / max(prec + rec, 1e-9) acc = (all_tp + all_tn) / 5000 print(f" {layers_desc:20s} ({n_params:5d} params): P={prec:.3f} R={rec:.3f} F1={f1:.3f} acc={acc:.3f} " f"(TP={all_tp} FP={all_fp} FN={all_fn} TN={all_tn})") # Also test at threshold 0.5 for best F1 print(f"\nSame models at threshold=0.5 (best F1):\n") for hidden, layers_desc in [(32, "92->32->1"), (64, "92->64->1"), (128, "92->64->64->1"), (256, "92->128->64->1")]: fold_size = 1000 all_tp = all_fp = all_fn = all_tn = 0 for fold in range(5): test_mask = torch.zeros(len(val), dtype=torch.bool, device="cuda") test_mask[fold * fold_size:(fold + 1) * fold_size] = True train_mask = ~test_mask train_x = X[train_mask]; train_y = Y[train_mask] test_x = X[test_mask]; test_y = Y[test_mask] if layers_desc == "92->32->1": model = nn.Sequential(nn.Linear(N, 32), nn.GELU(), nn.Linear(32, 1)).cuda() elif layers_desc == "92->64->1": model = nn.Sequential(nn.Linear(N, 64), nn.GELU(), nn.Linear(64, 1)).cuda() elif layers_desc == "92->64->64->1": model = nn.Sequential(nn.Linear(N, 64), nn.GELU(), nn.Linear(64, 64), nn.GELU(), nn.Linear(64, 1)).cuda() else: model = nn.Sequential(nn.Linear(N, 128), nn.GELU(), nn.Linear(128, 64), nn.GELU(), nn.Linear(64, 1)).cuda() n_params = sum(p.numel() for p in model.parameters()) opt = torch.optim.Adam(model.parameters(), lr=1e-3) model.train() for epoch in range(200): idx = torch.randperm(train_x.shape[0], device="cuda") for start in range(0, len(idx), 256): batch = idx[start:start + 256] logits = model(train_x[batch]).squeeze() loss = F.binary_cross_entropy_with_logits(logits, train_y[batch]) opt.zero_grad(); loss.backward(); opt.step() model.eval() with torch.no_grad(): pred = model(test_x).squeeze().sigmoid() > 0.5 tp = (pred & test_y.bool()).sum().item() fp = (pred & ~test_y.bool()).sum().item() fn = (~pred & test_y.bool()).sum().item() tn = (~pred & ~test_y.bool()).sum().item() all_tp += tp; all_fp += fp; all_fn += fn; all_tn += tn prec = all_tp / max(all_tp + all_fp, 1) rec = all_tp / max(all_tp + all_fn, 1) f1 = 2 * prec * rec / max(prec + rec, 1e-9) print(f" {layers_desc:20s} ({n_params:5d} params): P={prec:.3f} R={rec:.3f} F1={f1:.3f}")