KD099
/

nqr-snn-framework

ml-intern

Model card Files Files and versions

xet

Community

KD099 commited on Apr 28

Commit

d2e04af

verified ·

1 Parent(s): c142c1b

v3.2: update nqr_snn/snn/train.py

Browse files

Files changed (1) hide show

nqr_snn/snn/train.py +19 -5

nqr_snn/snn/train.py CHANGED Viewed

@@ -249,13 +249,19 @@ def train_snn(model: nn.Module, train_loader, val_loader, seed: int,
             print(f"  Epoch {epoch:3d}: train_loss={train_loss:.4f} train_acc={train_acc:.4f} "
                   f"val_loss={val_loss:.4f} val_acc={val_acc:.4f} lr={current_lr:.2e} ({epoch_time:.1f}s)")
-        # Early stopping on val_loss
-        if val_loss < best_val_loss:
-            best_val_loss = val_loss
             best_val_acc = val_acc
             best_epoch = epoch
             epochs_without_improvement = 0
-            torch.save(model.state_dict(), ckpt_path)
         else:
             epochs_without_improvement += 1
             if epochs_without_improvement >= patience:
@@ -266,7 +272,15 @@ def train_snn(model: nn.Module, train_loader, val_loader, seed: int,
     df.to_csv(csv_path, index=False)
     if os.path.exists(ckpt_path):
-        model.load_state_dict(torch.load(ckpt_path, map_location=device, weights_only=True))
     print(f"  Best: val_acc={best_val_acc:.4f} at epoch {best_epoch}")
     return best_val_acc, best_epoch

             print(f"  Epoch {epoch:3d}: train_loss={train_loss:.4f} train_acc={train_acc:.4f} "
                   f"val_loss={val_loss:.4f} val_acc={val_acc:.4f} lr={current_lr:.2e} ({epoch_time:.1f}s)")
+        # v3.2: Early stopping on val_accuracy (was val_loss).
+        # At 99%+ accuracy, val_loss fluctuates while accuracy plateaus.
+        # Tracking accuracy prevents premature stopping on loss noise.
+        if val_acc > best_val_acc:
             best_val_acc = val_acc
+            best_val_loss = val_loss
             best_epoch = epoch
             epochs_without_improvement = 0
+            # v3.2: Save encoder alongside model to fix train/inference mismatch
+            ckpt = {"model_state_dict": model.state_dict()}
+            if not skip_encoder and isinstance(encoder, nn.Module):
+                ckpt["encoder_state_dict"] = encoder.state_dict()
+            torch.save(ckpt, ckpt_path)
         else:
             epochs_without_improvement += 1
             if epochs_without_improvement >= patience:
     df.to_csv(csv_path, index=False)
     if os.path.exists(ckpt_path):
+        ckpt = torch.load(ckpt_path, map_location=device, weights_only=True)
+        # v3.2: Support new dict format and legacy state_dict format
+        if isinstance(ckpt, dict) and "model_state_dict" in ckpt:
+            model.load_state_dict(ckpt["model_state_dict"])
+            if not skip_encoder and isinstance(encoder, nn.Module) and "encoder_state_dict" in ckpt:
+                encoder.load_state_dict(ckpt["encoder_state_dict"])
+        else:
+            # Legacy checkpoint: bare state_dict
+            model.load_state_dict(ckpt)
     print(f"  Best: val_acc={best_val_acc:.4f} at epoch {best_epoch}")
     return best_val_acc, best_epoch