AbstractPhil
/

geolip-constellation-core

TensorBoard

Model card Files Files and versions

xet

Metrics Training metrics Community

AbstractPhil commited on 25 days ago

Commit

e255399

verified ·

1 Parent(s): f469500

Update trainer_model.py

Browse files

Files changed (1) hide show

trainer_model.py +125 -8

trainer_model.py CHANGED Viewed

@@ -223,25 +223,110 @@ class GeoLIPCore(nn.Module):
             ld['nce'] = l_nce
             ld['nce_acc'] = nce_acc
         # CV
         l_cv = self._cv_loss(emb)
         ld['cv'] = l_cv
         # Anchor spread
-        an = F.normalize(self.constellation.anchors, dim=-1)
-        sim_a = an @ an.T
-        mask = ~torch.eye(an.shape[0], dtype=torch.bool, device=an.device)
         l_spread = F.relu(sim_a[mask]).mean()
         ld['spread'] = l_spread
         # Total
         loss = (l_ce
                 + ld.get('nce', 0.0) * 1.0
                 + l_cv * 0.01
                 + l_spread * 0.001)
         ld['total'] = loss
         return loss, ld
     def _cv_loss(self, emb, n_samples=64, n_points=5):
         B = emb.shape[0]
         if B < n_points: return torch.tensor(0.0, device=emb.device)
@@ -295,8 +380,8 @@ N_ANCHORS = 64
 N_COMP = 8
 D_COMP = 64
 BATCH = 256
-EPOCHS = 50
-LR = 3e-3
 print("=" * 60)
 print("GeoLIP Core — Conv + Constellation + Patchwork")
@@ -359,15 +444,24 @@ writer = SummaryWriter("runs/geolip_core")
 best_acc = 0.0
 gs = 0
 print(f"\n{'='*60}")
 print(f"TRAINING — {EPOCHS} epochs")
 print(f"{'='*60}")
 for epoch in range(EPOCHS):
     model.train()
     t0 = time.time()
     tot_loss, tot_ce, tot_nce, tot_cv = 0, 0, 0, 0
-    tot_acc, tot_nce_acc, n = 0, 0, 0
     correct, total = 0, 0
     pbar = tqdm(train_loader, desc=f"E{epoch+1:3d}/{EPOCHS}", unit="b")
@@ -389,11 +483,29 @@ for epoch in range(EPOCHS):
         scheduler.step()
         gs += 1
         preds = out1['logits'].argmax(-1)
         correct += (preds == targets).sum().item()
         total += targets.shape[0]
         tot_loss += loss.item()
         tot_nce_acc += ld.get('nce_acc', 0)
         n += 1
         if n % 10 == 0:
@@ -401,6 +513,8 @@ for epoch in range(EPOCHS):
                 loss=f"{tot_loss/n:.4f}",
                 acc=f"{100*correct/total:.0f}%",
                 nce=f"{tot_nce_acc/n:.2f}",
                 ordered=True)
     elapsed = time.time() - t0
@@ -449,6 +563,8 @@ for epoch in range(EPOCHS):
     writer.add_scalar("epoch/val_acc", val_acc, epoch+1)
     writer.add_scalar("epoch/val_cv", v_cv, epoch+1)
     writer.add_scalar("epoch/anchors", n_active, epoch+1)
     mk = ""
     if val_acc > best_acc:
@@ -462,11 +578,12 @@ for epoch in range(EPOCHS):
         mk = " ★"
     nce_m = tot_nce_acc / n
     cv_band = "✓" if 0.18 <= v_cv <= 0.25 else "✗"
     print(f"  E{epoch+1:3d}: train={train_acc:.1f}% val={val_acc:.1f}% "
-          f"loss={tot_loss/n:.4f} nce={nce_m:.2f} "
           f"cv={v_cv:.4f}({cv_band}) anch={n_active}/{N_ANCHORS} "
-          f"({elapsed:.0f}s){mk}")
 writer.close()
 print(f"\n  Best val accuracy: {best_acc:.1f}%")

             ld['nce'] = l_nce
             ld['nce_acc'] = nce_acc
+        # ── Anchor attraction: pull each embedding toward its nearest anchor ──
+        anchors_n = F.normalize(self.constellation.anchors, dim=-1)
+        cos_to_anchors = emb @ anchors_n.T          # (B, n_anchors)
+        nearest_cos = cos_to_anchors.max(dim=1).values  # (B,)
+        l_attract = (1.0 - nearest_cos).mean()       # 0 when on top of anchor
+        ld['attract'] = l_attract
+        ld['nearest_cos'] = nearest_cos.mean().item()
         # CV
         l_cv = self._cv_loss(emb)
         ld['cv'] = l_cv
         # Anchor spread
+        sim_a = anchors_n @ anchors_n.T
+        mask = ~torch.eye(anchors_n.shape[0], dtype=torch.bool, device=anchors_n.device)
         l_spread = F.relu(sim_a[mask]).mean()
         ld['spread'] = l_spread
         # Total
         loss = (l_ce
                 + ld.get('nce', 0.0) * 1.0
+                + l_attract * 0.5
                 + l_cv * 0.01
                 + l_spread * 0.001)
         ld['total'] = loss
         return loss, ld
+    @torch.no_grad()
+    def push_anchors_to_centroids(self, emb_buffer, label_buffer, lr=0.1):
+        """
+        Push anchors toward CLASS centroids, not nearest-anchor centroids.
+        Phase 1: Compute class centroids from labels
+        Phase 2: Each class owns (n_anchors / n_classes) anchors
+        Phase 3: Assigned anchors blend toward their class centroid
+                 with small angular offsets so they don't all collapse
+        This works even when anchors start bunched at origin.
+        """
+        anchors = self.constellation.anchors.data  # (A, D)
+        n_a = anchors.shape[0]
+        emb_n = F.normalize(emb_buffer, dim=-1)
+        device = anchors.device
+        # Phase 1: class centroids
+        classes = label_buffer.unique()
+        n_cls = classes.shape[0]
+        centroids = []
+        for c in classes:
+            mask = label_buffer == c
+            if mask.sum() > 0:
+                centroids.append(F.normalize(emb_n[mask].mean(0, keepdim=True), dim=-1))
+        if len(centroids) == 0:
+            return 0
+        centroids = torch.cat(centroids, dim=0)  # (C, D)
+        # Phase 2: assign anchors to classes round-robin
+        # Sort anchors by cosine to each centroid, greedily assign
+        anchors_n = F.normalize(anchors, dim=-1)
+        cos = anchors_n @ centroids.T  # (A, C)
+        anchors_per_class = n_a // n_cls
+        assigned_class = torch.full((n_a,), -1, dtype=torch.long, device=device)
+        class_count = torch.zeros(n_cls, dtype=torch.long, device=device)
+        # Greedy: for each anchor, assign to its best class if that class has room
+        _, flat_idx = cos.flatten().sort(descending=True)
+        for idx in flat_idx:
+            a = (idx // n_cls).item()
+            c = (idx % n_cls).item()
+            if assigned_class[a] >= 0:
+                continue
+            if class_count[c] >= anchors_per_class + 1:  # +1 for remainder
+                continue
+            assigned_class[a] = c
+            class_count[c] += 1
+            if (assigned_class >= 0).all():
+                break
+        # Unassigned leftovers → nearest centroid
+        unassigned = (assigned_class < 0).nonzero(as_tuple=True)[0]
+        if len(unassigned) > 0:
+            leftover_cos = anchors_n[unassigned] @ centroids.T
+            assigned_class[unassigned] = leftover_cos.argmax(dim=1)
+        # Phase 3: push each anchor toward its class centroid
+        moved = 0
+        for a in range(n_a):
+            c = assigned_class[a].item()
+            target = centroids[c]
+            # Add small angular offset so co-class anchors don't collapse
+            rank_in_class = (assigned_class[:a] == c).sum().item()
+            if anchors_per_class > 1 and rank_in_class > 0:
+                # Tiny perpendicular perturbation
+                noise = torch.randn_like(target) * 0.05
+                noise = noise - (noise * target).sum() * target  # project out radial
+                target = F.normalize((target + noise).unsqueeze(0), dim=-1).squeeze(0)
+            anchors[a] = F.normalize(
+                (anchors_n[a] + lr * (target - anchors_n[a])).unsqueeze(0),
+                dim=-1).squeeze(0)
+            moved += 1
+        return moved
     def _cv_loss(self, emb, n_samples=64, n_points=5):
         B = emb.shape[0]
         if B < n_points: return torch.tensor(0.0, device=emb.device)
 N_COMP = 8
 D_COMP = 64
 BATCH = 256
+EPOCHS = 100
+LR = 3e-4
 print("=" * 60)
 print("GeoLIP Core — Conv + Constellation + Patchwork")
 best_acc = 0.0
 gs = 0
+# Anchor push config
+PUSH_INTERVAL = 50     # batches between centroid pushes
+PUSH_LR = 0.1         # blend rate toward centroid
+PUSH_BUFFER_SIZE = 5000
+emb_buffer = None      # (N, D) accumulated embeddings
+lbl_buffer = None      # (N,) accumulated labels
+push_count = 0
 print(f"\n{'='*60}")
 print(f"TRAINING — {EPOCHS} epochs")
+print(f"  Anchor push: every {PUSH_INTERVAL} batches, lr={PUSH_LR}")
 print(f"{'='*60}")
 for epoch in range(EPOCHS):
     model.train()
     t0 = time.time()
     tot_loss, tot_ce, tot_nce, tot_cv = 0, 0, 0, 0
+    tot_acc, tot_nce_acc, tot_nearest_cos, n = 0, 0, 0, 0
     correct, total = 0, 0
     pbar = tqdm(train_loader, desc=f"E{epoch+1:3d}/{EPOCHS}", unit="b")
         scheduler.step()
         gs += 1
+        # ── Accumulate embeddings for anchor push ──
+        with torch.no_grad():
+            batch_emb = out1['embedding'].detach().float()
+            if emb_buffer is None:
+                emb_buffer = batch_emb
+                lbl_buffer = targets.detach()
+            else:
+                emb_buffer = torch.cat([emb_buffer, batch_emb])[-PUSH_BUFFER_SIZE:]
+                lbl_buffer = torch.cat([lbl_buffer, targets.detach()])[-PUSH_BUFFER_SIZE:]
+        # ── Periodic anchor push toward class centroids ──
+        if gs % PUSH_INTERVAL == 0 and emb_buffer is not None and emb_buffer.shape[0] > 500:
+            moved = model.push_anchors_to_centroids(
+                emb_buffer, lbl_buffer, lr=PUSH_LR)
+            push_count += 1
+            writer.add_scalar("step/anchors_moved", moved, gs)
         preds = out1['logits'].argmax(-1)
         correct += (preds == targets).sum().item()
         total += targets.shape[0]
         tot_loss += loss.item()
         tot_nce_acc += ld.get('nce_acc', 0)
+        tot_nearest_cos += ld.get('nearest_cos', 0)
         n += 1
         if n % 10 == 0:
                 loss=f"{tot_loss/n:.4f}",
                 acc=f"{100*correct/total:.0f}%",
                 nce=f"{tot_nce_acc/n:.2f}",
+                cos=f"{ld.get('nearest_cos', 0):.3f}",
+                push=push_count,
                 ordered=True)
     elapsed = time.time() - t0
     writer.add_scalar("epoch/val_acc", val_acc, epoch+1)
     writer.add_scalar("epoch/val_cv", v_cv, epoch+1)
     writer.add_scalar("epoch/anchors", n_active, epoch+1)
+    writer.add_scalar("epoch/nearest_cos", tot_nearest_cos / n, epoch+1)
+    writer.add_scalar("epoch/push_count", push_count, epoch+1)
     mk = ""
     if val_acc > best_acc:
         mk = " ★"
     nce_m = tot_nce_acc / n
+    cos_m = tot_nearest_cos / n
     cv_band = "✓" if 0.18 <= v_cv <= 0.25 else "✗"
     print(f"  E{epoch+1:3d}: train={train_acc:.1f}% val={val_acc:.1f}% "
+          f"loss={tot_loss/n:.4f} nce={nce_m:.2f} cos={cos_m:.3f} "
           f"cv={v_cv:.4f}({cv_band}) anch={n_active}/{N_ANCHORS} "
+          f"push={push_count} ({elapsed:.0f}s){mk}")
 writer.close()
 print(f"\n  Best val accuracy: {best_acc:.1f}%")