Update README with pruning curve and dim20, fix person detector ext4 path

Browse files

Files changed (6) hide show

README.md +10 -7
heads/cofiber_threshold/dim20_20k/checkpoint.pth +3 -0
heads/cofiber_threshold/dim20_20k/head.py +66 -0
heads/cofiber_threshold/dim20_20k/svd_init.pt +3 -0
heads/cofiber_threshold/dim20_20k/train.py +375 -0
heads/cofiber_threshold_person/linear_9k/train.py +1 -1

README.md CHANGED Viewed

@@ -161,13 +161,16 @@ The original design leads on precision. Additional scales, adaptive boundaries,
 Two Cofiber Threshold variants trained on full COCO 2017 train (117,266 images), 8 epochs, batch 64, AdamW lr 1e-3, cosine schedule with 3% warmup. Frozen EUPE-ViT-B backbone. Evaluated with pycocotools on the standard 5000-image val set.
-| Variant | Box regression | Params | Nonzero | mAP@[0.5:0.95] | mAP@0.50 | mAP@0.75 | mAP small | mAP medium | mAP large |
-|---------|---------------|--------|---------|----------------|----------|----------|-----------|------------|-----------|
-| linear_70k | 768→4 | 69,976 | 69,976 | 4.0 | 15.8 | 0.8 | 1.3 | 4.1 | 6.3 |
-| box32_92k | 768→32→4 | 91,640 | 91,640 | 5.7 | 20.6 | 1.3 | 2.5 | 6.4 | 7.9 |
-| box32_92k pruned | 768→32→4 | 91,640 | 76,640 | 5.7 | 20.7 | 1.3 | 2.5 | 6.5 | 8.0 |
-The pruned variant zeros 15,000 prototype weights with no mAP degradation. All three are the smallest detection heads to produce standard COCO mAP numbers. Both unpruned variants remain smaller than the NanoDet-m-0.5x detection head (94K parameters). Pruning is ongoing.
 ## Repository Structure

 Two Cofiber Threshold variants trained on full COCO 2017 train (117,266 images), 8 epochs, batch 64, AdamW lr 1e-3, cosine schedule with 3% warmup. Frozen EUPE-ViT-B backbone. Evaluated with pycocotools on the standard 5000-image val set.
+| Variant | Box regression | Params | Nonzero | mAP@[0.5:0.95] | mAP@0.50 | mAP@0.75 |
+|---------|---------------|--------|---------|----------------|----------|----------|
+| linear_70k | 768→4 | 69,976 | 69,976 | 4.0 | 15.8 | 0.8 |
+| box32_92k | 768→32→4 | 91,640 | 91,640 | 5.7 | 20.6 | 1.3 |
+| box32 pruned R1 | 768→32→4 | 91,640 | 76,640 | 5.7 | 20.7 | 1.3 |
+| box32 pruned R2 | 768→32→4 | 91,640 | ~62,000 | **5.9** | 20.4 | **1.5** |
+| box32 pruned R3 | 768→32→4 | 91,640 | ~47,000 | 5.1 | 17.1 | 1.4 |
+| dim20 (training) | 768→20→16→4 | 22,076 | 22,076 | pending | — | — |
+Pruning improved mAP from 5.7 to 5.9 at R2 (~62K nonzero) by removing noisy prototype weights. R3 pushed past the degradation threshold. SVD analysis of the R2 prototypes showed effective rank ~20 for 72% energy retention, motivating the dim20 variant: a 768→20 bottleneck projection followed by 20→80 classification, initialized from the SVD vectors of the pruned prototypes. All variants are the smallest detection heads to produce standard COCO mAP numbers.
 ## Repository Structure

heads/cofiber_threshold/dim20_20k/checkpoint.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:acd116819d27c1b4c8cfeece6195d6d22abe31b3382394c2af44ec509b7bf7ef
+size 94325

heads/cofiber_threshold/dim20_20k/head.py ADDED Viewed

	@@ -0,0 +1,66 @@

+"""Cofiber Threshold with dimension selection: 768→20→80 classification.
+The bottleneck dimension K=20 was selected from SVD analysis of the pruned
+prototype matrix, where rank 20 captures 72% of the energy. This is the
+information bottleneck variant applied to detection: how few feature dimensions
+does the backbone need to expose for 80-class detection?
+~20K total params.
+"""
+import math
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+def cofiber_decompose(f, n_scales):
+    cofibers = []
+    residual = f
+    for _ in range(n_scales - 1):
+        omega = F.avg_pool2d(residual, 2)
+        sigma_omega = F.interpolate(omega, size=residual.shape[2:], mode="bilinear", align_corners=False)
+        cofibers.append(residual - sigma_omega)
+        residual = omega
+    cofibers.append(residual)
+    return cofibers
+class CofiberThresholdDim20(nn.Module):
+    """Cofiber decomposition + 768→20 projection + 20→80 classification. ~20K params."""
+    name = "cofiber_threshold_dim20"
+    needs_intermediates = False
+    def __init__(self, feat_dim=768, bottleneck_dim=20, num_classes=80, n_scales=3, reg_hidden=16):
+        super().__init__()
+        self.n_scales = n_scales
+        self.scale_norms = nn.ModuleList([nn.LayerNorm(feat_dim) for _ in range(n_scales)])
+        # Bottleneck projection
+        self.project = nn.Linear(feat_dim, bottleneck_dim, bias=False)
+        # Classification from bottleneck
+        self.cls_weight = nn.Parameter(torch.randn(num_classes, bottleneck_dim) * 0.01)
+        self.cls_bias = nn.Parameter(torch.zeros(num_classes))
+        # Box regression from bottleneck (small hidden layer)
+        self.reg_hidden = nn.Linear(bottleneck_dim, reg_hidden)
+        self.reg_act = nn.GELU()
+        self.reg_out = nn.Linear(reg_hidden, 4)
+        # Centerness from bottleneck
+        self.ctr_weight = nn.Parameter(torch.randn(1, bottleneck_dim) * 0.01)
+        self.ctr_bias = nn.Parameter(torch.zeros(1))
+        self.scale_params = nn.Parameter(torch.ones(n_scales))
+    def forward(self, spatial, inter=None):
+        cofibers = cofiber_decompose(spatial, self.n_scales)
+        cls_l, reg_l, ctr_l = [], [], []
+        for i, cof in enumerate(cofibers):
+            B, C, H, W = cof.shape
+            f = self.scale_norms[i](cof.permute(0, 2, 3, 1).reshape(-1, C))
+            z = self.project(f)  # (N, 20)
+            cls = (z @ self.cls_weight.T + self.cls_bias).reshape(B, H, W, -1).permute(0, 3, 1, 2)
+            reg_raw = (self.reg_out(self.reg_act(self.reg_hidden(z))) * self.scale_params[i]).clamp(-10, 10)
+            reg = torch.exp(reg_raw).reshape(B, H, W, 4).permute(0, 3, 1, 2)
+            ctr = (z @ self.ctr_weight.T + self.ctr_bias).reshape(B, H, W, 1).permute(0, 3, 1, 2)
+            cls_l.append(cls)
+            reg_l.append(reg)
+            ctr_l.append(ctr)
+        return cls_l, reg_l, ctr_l

heads/cofiber_threshold/dim20_20k/svd_init.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6ec7e85396c35b2e0678220d2471286a7df583d4635b2553717fd107dd80a4b5
+size 69741

heads/cofiber_threshold/dim20_20k/train.py ADDED Viewed

	@@ -0,0 +1,375 @@

+"""
+Train Cofiber Threshold Dim20 (22K params) on full COCO 2017 train.
+Initialized from SVD of the pruned prototype matrix — the projection starts
+from the top-20 directions the pruned prototypes identified as important.
+Same hyperparameters as box32: batch 64, lr 1e-3, cosine + 3% warmup, 8 epochs.
+"""
+import math
+import os
+import sys
+import time
+import json
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from PIL import Image
+from torch.utils.data import DataLoader, Dataset
+from torchvision.transforms import v2
+from torchvision.ops import generalized_box_iou
+SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
+sys.path.insert(0, os.path.join(SCRIPT_DIR, '..', '..', '..'))
+EUPE_REPO = os.environ.get("ARENA_BACKBONE_REPO", "/home/zootest/EUPE")
+EUPE_WEIGHTS = os.environ.get("ARENA_BACKBONE_WEIGHTS", "/home/zootest/weights/eupe_vitb/EUPE-ViT-B.pt")
+COCO_ROOT = os.environ.get("ARENA_COCO_ROOT", "/home/zootest/datasets/coco")
+OUTPUT_DIR = SCRIPT_DIR
+if EUPE_REPO not in sys.path:
+    sys.path.insert(0, EUPE_REPO)
+RESOLUTION = 640
+NUM_CLASSES = 80
+BATCH_SIZE = 64
+LR = 1e-3
+WEIGHT_DECAY = 1e-4
+EPOCHS = 8
+GRAD_CLIP = 5.0
+WARMUP_FRACTION = 0.03
+COCO_CONTIG_TO_CAT = [
+    1,2,3,4,5,6,7,8,9,10,11,13,14,15,16,17,18,19,20,21,22,23,24,25,27,28,31,32,
+    33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,49,50,51,52,53,54,55,56,57,58,
+    59,60,61,62,63,64,65,67,70,72,73,74,75,76,77,78,79,80,81,82,84,85,86,87,88,89,90,
+]
+COCO_CAT_TO_CONTIG = {cat: i for i, cat in enumerate(COCO_CONTIG_TO_CAT)}
+def letterbox(image, res):
+    W0, H0 = image.size
+    scale = res / max(H0, W0)
+    new_w, new_h = int(round(W0 * scale)), int(round(H0 * scale))
+    resized = image.resize((new_w, new_h), Image.BILINEAR)
+    canvas = Image.new("RGB", (res, res), (0, 0, 0))
+    canvas.paste(resized, (0, 0))
+    return canvas, scale
+class COCODetection(Dataset):
+    def __init__(self, root, split="train"):
+        img_dir = os.path.join(root, f"{split}2017")
+        ann_file = os.path.join(root, "annotations", f"instances_{split}2017.json")
+        with open(ann_file) as f:
+            coco = json.load(f)
+        self.img_dir = img_dir
+        self.normalize = v2.Compose([
+            v2.ToImage(), v2.ToDtype(torch.float32, scale=True),
+            v2.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
+        ])
+        id_to_anns = {}
+        for a in coco["annotations"]:
+            if a["iscrowd"]:
+                continue
+            cat = a["category_id"]
+            if cat not in COCO_CAT_TO_CONTIG:
+                continue
+            id_to_anns.setdefault(a["image_id"], []).append(a)
+        self.items = []
+        id_to_info = {img["id"]: img for img in coco["images"]}
+        for iid, anns in id_to_anns.items():
+            info = id_to_info[iid]
+            boxes, labels = [], []
+            for a in anns:
+                x, y, w, h = a["bbox"]
+                if w < 1 or h < 1:
+                    continue
+                boxes.append([x, y, x + w, y + h])
+                labels.append(COCO_CAT_TO_CONTIG[a["category_id"]])
+            if boxes:
+                self.items.append({"file": info["file_name"], "boxes": boxes, "labels": labels})
+        print(f"  COCO {split}: {len(self.items)} images", flush=True)
+    def __len__(self):
+        return len(self.items)
+    def __getitem__(self, idx):
+        item = self.items[idx]
+        img = Image.open(os.path.join(self.img_dir, item["file"])).convert("RGB")
+        canvas, scale = letterbox(img, RESOLUTION)
+        x = self.normalize(canvas)
+        boxes = torch.tensor(item["boxes"], dtype=torch.float32) * scale
+        labels = torch.tensor(item["labels"], dtype=torch.long)
+        return x, boxes, labels
+def collate_fn(batch):
+    return torch.stack([b[0] for b in batch]), [b[1] for b in batch], [b[2] for b in batch]
+# Inline head
+def cofiber_decompose(f, n_scales):
+    cofibers = []
+    residual = f
+    for _ in range(n_scales - 1):
+        omega = F.avg_pool2d(residual, 2)
+        sigma_omega = F.interpolate(omega, size=residual.shape[2:], mode="bilinear", align_corners=False)
+        cofibers.append(residual - sigma_omega)
+        residual = omega
+    cofibers.append(residual)
+    return cofibers
+class CofiberThresholdDim20(nn.Module):
+    def __init__(self, feat_dim=768, bottleneck_dim=20, num_classes=80, n_scales=3, reg_hidden=16):
+        super().__init__()
+        self.n_scales = n_scales
+        self.scale_norms = nn.ModuleList([nn.LayerNorm(feat_dim) for _ in range(n_scales)])
+        self.project = nn.Linear(feat_dim, bottleneck_dim, bias=False)
+        self.cls_weight = nn.Parameter(torch.randn(num_classes, bottleneck_dim) * 0.01)
+        self.cls_bias = nn.Parameter(torch.zeros(num_classes))
+        self.reg_hidden_layer = nn.Linear(bottleneck_dim, reg_hidden)
+        self.reg_act = nn.GELU()
+        self.reg_out = nn.Linear(reg_hidden, 4)
+        self.ctr_weight = nn.Parameter(torch.randn(1, bottleneck_dim) * 0.01)
+        self.ctr_bias = nn.Parameter(torch.zeros(1))
+        self.scale_params = nn.Parameter(torch.ones(n_scales))
+    def forward(self, spatial):
+        cofibers = cofiber_decompose(spatial, self.n_scales)
+        cls_l, reg_l, ctr_l = [], [], []
+        for i, cof in enumerate(cofibers):
+            B, C, H, W = cof.shape
+            f = self.scale_norms[i](cof.permute(0, 2, 3, 1).reshape(-1, C))
+            z = self.project(f)
+            cls = (z @ self.cls_weight.T + self.cls_bias).reshape(B, H, W, -1).permute(0, 3, 1, 2)
+            reg_raw = (self.reg_out(self.reg_act(self.reg_hidden_layer(z))) * self.scale_params[i]).clamp(-10, 10)
+            reg = torch.exp(reg_raw).reshape(B, H, W, 4).permute(0, 3, 1, 2)
+            ctr = (z @ self.ctr_weight.T + self.ctr_bias).reshape(B, H, W, 1).permute(0, 3, 1, 2)
+            cls_l.append(cls); reg_l.append(reg); ctr_l.append(ctr)
+        return cls_l, reg_l, ctr_l
+# Inline loss (same as other scripts)
+def make_locations(feature_sizes, strides, device):
+    locs = []
+    for (h, w), s in zip(feature_sizes, strides):
+        ys = (torch.arange(h, device=device, dtype=torch.float32) + 0.5) * s
+        xs = (torch.arange(w, device=device, dtype=torch.float32) + 0.5) * s
+        gy, gx = torch.meshgrid(ys, xs, indexing="ij")
+        locs.append(torch.stack([gx.flatten(), gy.flatten()], -1))
+    return locs
+def assign_targets(locations, boxes, labels, strides, size_ranges):
+    cls_t, reg_t, ctr_t = [], [], []
+    if boxes.numel() == 0:
+        for loc in locations:
+            n = loc.shape[0]
+            cls_t.append(torch.full((n,), -1, dtype=torch.long, device=loc.device))
+            reg_t.append(torch.zeros(n, 4, device=loc.device))
+            ctr_t.append(torch.zeros(n, device=loc.device))
+        return cls_t, reg_t, ctr_t
+    areas = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
+    for loc, stride, sr in zip(locations, strides, size_ranges):
+        n = loc.shape[0]
+        l = loc[:, None, 0] - boxes[None, :, 0]
+        t = loc[:, None, 1] - boxes[None, :, 1]
+        r = boxes[None, :, 2] - loc[:, None, 0]
+        b = boxes[None, :, 3] - loc[:, None, 1]
+        ltrb = torch.stack([l, t, r, b], dim=-1)
+        in_box = ltrb.min(dim=-1).values > 0
+        cx = (boxes[:, 0] + boxes[:, 2]) / 2
+        cy = (boxes[:, 1] + boxes[:, 3]) / 2
+        rad = stride * 1.5
+        in_center = ((loc[:, None, 0] >= cx - rad) & (loc[:, None, 0] <= cx + rad) &
+                     (loc[:, None, 1] >= cy - rad) & (loc[:, None, 1] <= cy + rad))
+        max_d = ltrb.max(dim=-1).values
+        in_level = (max_d >= sr[0]) & (max_d <= sr[1])
+        pos = in_box & in_center & in_level
+        a = areas[None, :].expand_as(pos).clone()
+        a[~pos] = float("inf")
+        matched = a.argmin(dim=-1)
+        is_pos = a.gather(1, matched[:, None]).squeeze(1) < float("inf")
+        ct = torch.full((n,), -1, dtype=torch.long, device=loc.device)
+        ct[is_pos] = labels[matched[is_pos]]
+        rt = torch.zeros(n, 4, device=loc.device)
+        if is_pos.any():
+            rt[is_pos] = ltrb[torch.arange(n, device=loc.device)[is_pos], matched[is_pos]]
+        ctrt = torch.zeros(n, device=loc.device)
+        if is_pos.any():
+            lp, tp, rp, bp = rt[is_pos].unbind(-1)
+            ctrt[is_pos] = torch.sqrt(
+                (torch.minimum(lp, rp) / torch.maximum(lp, rp).clamp(min=1e-6)) *
+                (torch.minimum(tp, bp) / torch.maximum(tp, bp).clamp(min=1e-6)))
+        cls_t.append(ct); reg_t.append(rt); ctr_t.append(ctrt)
+    return cls_t, reg_t, ctr_t
+def focal_loss(logits, targets, alpha=0.25, gamma=2.0):
+    p = torch.sigmoid(logits)
+    ce = F.binary_cross_entropy_with_logits(logits, targets, reduction="none")
+    pt = p * targets + (1 - p) * (1 - targets)
+    at = alpha * targets + (1 - alpha) * (1 - targets)
+    return (at * (1 - pt) ** gamma * ce).sum()
+def compute_loss(cls_per, reg_per, ctr_per, locs_per, boxes_batch, labels_batch):
+    B = cls_per[0].shape[0]
+    device = cls_per[0].device
+    num_classes = cls_per[0].shape[1]
+    strides = [16, 32, 64]
+    size_ranges = [(-1, 128), (128, 256), (256, float("inf"))]
+    flat_cls, flat_reg, flat_ctr = [], [], []
+    for cl, rg, ct in zip(cls_per, reg_per, ctr_per):
+        b, c, h, w = cl.shape
+        flat_cls.append(cl.permute(0, 2, 3, 1).reshape(b, h * w, c))
+        flat_reg.append(rg.permute(0, 2, 3, 1).reshape(b, h * w, 4))
+        flat_ctr.append(ct.permute(0, 2, 3, 1).reshape(b, h * w))
+    pred_cls = torch.cat(flat_cls, 1)
+    pred_reg = torch.cat(flat_reg, 1)
+    pred_ctr = torch.cat(flat_ctr, 1)
+    all_locs = torch.cat(locs_per, 0)
+    all_ct, all_rt, all_ctt = [], [], []
+    for i in range(B):
+        ct, rt, ctt = assign_targets(locs_per, boxes_batch[i], labels_batch[i], strides, size_ranges)
+        all_ct.append(torch.cat(ct)); all_rt.append(torch.cat(rt)); all_ctt.append(torch.cat(ctt))
+    tgt_cls = torch.stack(all_ct)
+    tgt_reg = torch.stack(all_rt)
+    tgt_ctr = torch.stack(all_ctt)
+    pos = tgt_cls >= 0
+    npos = max(pos.sum().item(), 1)
+    oh = torch.zeros_like(pred_cls)
+    pi = pos.nonzero(as_tuple=True)
+    oh[pi[0], pi[1], tgt_cls[pos]] = 1.0
+    loss_cls = focal_loss(pred_cls.reshape(-1, num_classes), oh.reshape(-1, num_classes)) / npos
+    if pos.any():
+        pp = pred_reg[pos]; tp = tgt_reg[pos]; pl = all_locs[None].expand(B, -1, -1)[pos]
+        pb = torch.stack([pl[:,0]-pp[:,0], pl[:,1]-pp[:,1], pl[:,0]+pp[:,2], pl[:,1]+pp[:,3]], -1)
+        tb = torch.stack([pl[:,0]-tp[:,0], pl[:,1]-tp[:,1], pl[:,0]+tp[:,2], pl[:,1]+tp[:,3]], -1)
+        giou = generalized_box_iou(pb, tb)
+        loss_reg = (1 - giou.diagonal()).sum() / npos
+        loss_ctr = F.binary_cross_entropy_with_logits(pred_ctr[pos], tgt_ctr[pos], reduction="sum") / npos
+    else:
+        loss_reg = loss_ctr = torch.tensor(0.0, device=device)
+    return loss_cls + loss_reg + loss_ctr
+def train():
+    os.makedirs(OUTPUT_DIR, exist_ok=True)
+    print("=" * 60)
+    print("Cofiber Threshold Dim20: 22K params, SVD-initialized, 8 epochs")
+    print("=" * 60, flush=True)
+    print("\n[1/4] Loading backbone...", flush=True)
+    backbone = torch.hub.load(EUPE_REPO, "eupe_vitb16", source="local", weights=EUPE_WEIGHTS)
+    backbone = backbone.cuda().eval()
+    for p in backbone.parameters():
+        p.requires_grad = False
+    print("\n[2/4] Building head with SVD initialization...", flush=True)
+    head = CofiberThresholdDim20().cuda()
+    # Initialize from SVD of pruned prototypes
+    svd_init_path = os.path.join(SCRIPT_DIR, "svd_init.pt")
+    if os.path.isfile(svd_init_path):
+        svd_init = torch.load(svd_init_path, map_location="cuda")
+        head.project.weight.data = svd_init["project"]
+        head.cls_weight.data = svd_init["cls_weight"]
+        print("  SVD initialization loaded", flush=True)
+    else:
+        print("  No SVD init found, using random", flush=True)
+    n_params = sum(p.numel() for p in head.parameters())
+    print(f"  {n_params:,} params", flush=True)
+    print("\n[3/4] Loading COCO...", flush=True)
+    train_ds = COCODetection(COCO_ROOT, "train")
+    train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True,
+                              num_workers=4, pin_memory=True, drop_last=True, collate_fn=collate_fn)
+    steps_per_epoch = len(train_loader)
+    total_steps = steps_per_epoch * EPOCHS
+    warmup_steps = int(total_steps * WARMUP_FRACTION)
+    print(f"  {len(train_ds)} images, {steps_per_epoch} steps/epoch, {total_steps} total", flush=True)
+    optimizer = torch.optim.AdamW(head.parameters(), lr=LR, weight_decay=WEIGHT_DECAY)
+    def lr_lambda(step):
+        if step < warmup_steps:
+            return step / max(warmup_steps, 1)
+        progress = (step - warmup_steps) / max(total_steps - warmup_steps, 1)
+        return 0.5 * (1.0 + math.cos(math.pi * progress))
+    scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda)
+    strides = [16, 32, 64]
+    H = RESOLUTION // 16
+    locs = make_locations([(H, H), (H//2, H//2), (H//4, H//4)], strides, torch.device("cuda"))
+    print(f"\n[4/4] Training...", flush=True)
+    log_file = open(os.path.join(OUTPUT_DIR, "train.log"), "a")
+    head.train()
+    global_step = 0
+    running_loss = 0.0
+    running_count = 0
+    t0 = time.time()
+    for epoch in range(EPOCHS):
+        print(f"\n  Epoch {epoch+1}/{EPOCHS} starting (step {global_step})", flush=True)
+        for images, boxes_b, labels_b in train_loader:
+            if global_step >= total_steps:
+                break
+            images = images.cuda(non_blocking=True)
+            boxes_b = [b.cuda(non_blocking=True) for b in boxes_b]
+            labels_b = [l.cuda(non_blocking=True) for l in labels_b]
+            try:
+                with torch.no_grad():
+                    with torch.autocast("cuda", dtype=torch.bfloat16):
+                        out = backbone.forward_features(images)
+                patches = out["x_norm_patchtokens"].float()
+                B, N, D = patches.shape
+                h = w = int(N ** 0.5)
+                spatial = patches.permute(0, 2, 1).reshape(B, D, h, w)
+                cls_l, reg_l, ctr_l = head(spatial)
+                loss = compute_loss(cls_l, reg_l, ctr_l, locs, boxes_b, labels_b)
+                if torch.isnan(loss) or torch.isinf(loss):
+                    print(f"  WARNING: NaN/Inf loss at step {global_step}", flush=True)
+                    optimizer.zero_grad(); global_step += 1; scheduler.step(); continue
+                optimizer.zero_grad()
+                loss.backward()
+                torch.nn.utils.clip_grad_norm_(head.parameters(), GRAD_CLIP)
+                optimizer.step()
+                scheduler.step()
+                global_step += 1
+                running_loss += loss.item()
+                running_count += 1
+                if global_step % 100 == 0:
+                    elapsed = time.time() - t0
+                    avg = running_loss / max(running_count, 1)
+                    lr_now = scheduler.get_last_lr()[0]
+                    msg = f"step {global_step}/{total_steps} (epoch {epoch+1}) loss={loss.item():.4f} avg={avg:.4f} lr={lr_now:.2e} {running_count/elapsed:.1f} it/s"
+                    print(msg, flush=True)
+                    log_file.write(msg + "\n"); log_file.flush()
+                if global_step % 1000 == 0:
+                    torch.save({"head": head.state_dict(), "global_step": global_step},
+                               os.path.join(OUTPUT_DIR, "checkpoint.pth"))
+                    print(f"  Checkpoint saved at step {global_step}", flush=True)
+            except Exception as e:
+                import traceback
+                print(f"\n  ERROR at step {global_step}: {e}", flush=True)
+                traceback.print_exc()
+                if "out of memory" in str(e):
+                    torch.cuda.empty_cache(); optimizer.zero_grad(); global_step += 1; scheduler.step(); continue
+                raise
+        print(f"  Epoch {epoch+1}/{EPOCHS} complete (step {global_step})", flush=True)
+    final_path = os.path.join(OUTPUT_DIR, "cofiber_threshold_dim20_coco_8ep_22k.pth")
+    torch.save(head.state_dict(), final_path)
+    print(f"\nSaved: {final_path}")
+    print(f"Training complete: {total_steps} steps, {(time.time()-t0)/3600:.1f} hours", flush=True)
+    log_file.close()
+if __name__ == "__main__":
+    train()

heads/cofiber_threshold_person/linear_9k/train.py CHANGED Viewed

@@ -22,7 +22,7 @@ sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', '..', '..'))
 EUPE_REPO = os.environ.get("ARENA_BACKBONE_REPO", "/home/zootest/EUPE")
 EUPE_WEIGHTS = os.environ.get("ARENA_BACKBONE_WEIGHTS", "/home/zootest/weights/eupe_vitb/EUPE-ViT-B.pt")
-COCO_ROOT = os.environ.get("ARENA_COCO_ROOT", "/mnt/d/JacobProject/datasets/llava_instruct/coco")
 OUTPUT_DIR = os.path.join(os.path.dirname(__file__))
 if EUPE_REPO not in sys.path:

 EUPE_REPO = os.environ.get("ARENA_BACKBONE_REPO", "/home/zootest/EUPE")
 EUPE_WEIGHTS = os.environ.get("ARENA_BACKBONE_WEIGHTS", "/home/zootest/weights/eupe_vitb/EUPE-ViT-B.pt")
+COCO_ROOT = os.environ.get("ARENA_COCO_ROOT", "/home/zootest/datasets/coco")
 OUTPUT_DIR = os.path.join(os.path.dirname(__file__))
 if EUPE_REPO not in sys.path: