Training in progress, epoch 1

Files changed (9) hide show

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fbe5fc7ba60f16342a2cea677db69c8ae5f868fc72eeac6a4b913d0533dcab28
 size 598898116

 version https://git-lfs.github.com/spec/v1
+oid sha256:d05da4069fc52d58f023e29df4dcdd61eef9ec8db8396ec64e56917963bbb9d9
 size 598898116

run-0/checkpoint-318/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cc5bce592f3c479083253508a74c9f5c2b63a4c297c31dfb0c1c8b284bc85f12
 size 598898116

 version https://git-lfs.github.com/spec/v1
+oid sha256:d05da4069fc52d58f023e29df4dcdd61eef9ec8db8396ec64e56917963bbb9d9
 size 598898116

run-0/checkpoint-318/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:231b4bbb1926a1cc766d9031ade587a41ad4204fd26a137b125c55927dc7a397
 size 1197886411

 version https://git-lfs.github.com/spec/v1
+oid sha256:c3017f0f9e1765dc23bae8577f1efa55de32e56690c998f0edf1e472bc741c30
 size 1197886411

run-0/checkpoint-318/scaler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0e8bef6dda512071503ea3bab68f0960919f4ba9156b465cc1853aaa448a81f7
 size 1383

 version https://git-lfs.github.com/spec/v1
+oid sha256:d3e642f50b65cad64c4e76d928defe85e28a1873bd062929b57f3779c4adc0d3
 size 1383

run-0/checkpoint-318/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:633f7a108b7f060837a2327a4e3f8416e33eb696c13bf2067ef22cb46c181f2d
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:7f5820fad84020b09d881b4b6d6ce78d3731e06e52d3401636c1604c2a2630f2
 size 1465

run-0/checkpoint-318/trainer_state.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "best_global_step": 318,
-  "best_metric": 0.8782099185536261,
   "best_model_checkpoint": "ModernBERT-base-distilled/run-0/checkpoint-318",
   "epoch": 1.0,
   "eval_steps": 500,
@@ -11,26 +11,26 @@
   "log_history": [
     {
       "epoch": 1.0,
-      "grad_norm": 6.882817268371582,
-      "learning_rate": 1.0031446540880504e-05,
-      "loss": 3.7921,
       "step": 318
     },
     {
       "epoch": 1.0,
-      "eval_accuracy": 0.8806451612903226,
-      "eval_f1": 0.8782099185536261,
-      "eval_loss": 2.21335768699646,
-      "eval_runtime": 19.6251,
-      "eval_samples_per_second": 157.961,
-      "eval_steps_per_second": 3.312,
       "step": 318
     }
   ],
   "logging_steps": 500,
-  "max_steps": 636,
   "num_input_tokens_seen": 0,
-  "num_train_epochs": 2,
   "save_steps": 500,
   "stateful_callbacks": {
     "EarlyStoppingCallback": {
@@ -53,12 +53,12 @@
       "attributes": {}
     }
   },
-  "total_flos": 225704119111812.0,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": {
-    "alpha": 0.2238473023876273,
-    "num_train_epochs": 2,
-    "temperature": 16
   }
 }

 {
   "best_global_step": 318,
+  "best_metric": 0.9051373620810793,
   "best_model_checkpoint": "ModernBERT-base-distilled/run-0/checkpoint-318",
   "epoch": 1.0,
   "eval_steps": 500,
   "log_history": [
     {
       "epoch": 1.0,
+      "grad_norm": 7.095359802246094,
+      "learning_rate": 1.5015723270440253e-05,
+      "loss": 4.2443,
       "step": 318
     },
     {
       "epoch": 1.0,
+      "eval_accuracy": 0.9070967741935484,
+      "eval_f1": 0.9051373620810793,
+      "eval_loss": 2.06101655960083,
+      "eval_runtime": 16.6455,
+      "eval_samples_per_second": 186.236,
+      "eval_steps_per_second": 3.905,
       "step": 318
     }
   ],
   "logging_steps": 500,
+  "max_steps": 1272,
   "num_input_tokens_seen": 0,
+  "num_train_epochs": 4,
   "save_steps": 500,
   "stateful_callbacks": {
     "EarlyStoppingCallback": {
       "attributes": {}
     }
   },
+  "total_flos": 331330912647588.0,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": {
+    "alpha": 0.362336107066899,
+    "num_train_epochs": 4,
+    "temperature": 10
   }
 }

run-0/checkpoint-318/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9c288a49e5d462511afdcb18951b1fb2d83ef9f1ece8ef0108b7a3182e94fe32
 size 5905

 version https://git-lfs.github.com/spec/v1
+oid sha256:e57990bfcffc7508b1aa41cf9c14f9a33fd6a76ae0e5b564184c21c4e65e1398
 size 5905

runs/Oct11_19-55-54_cd07aeb3aeb4/events.out.tfevents.1760213687.cd07aeb3aeb4.2948.1 CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2972b1965f5ee7fd2e6c57ed4c4222c2cce42193ee44ad4ce3f8ca3f6f472d70
-size 13790

 version https://git-lfs.github.com/spec/v1
+oid sha256:f8347db70977538ed9d9e433e8a4c4278244d3788ddfb49687a6c6c6eb814aae
+size 27492

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a4d1f48d83021324c19290eb8526bd37db5d8c6569e5318a75916976ce927e32
 size 5905

 version https://git-lfs.github.com/spec/v1
+oid sha256:e57990bfcffc7508b1aa41cf9c14f9a33fd6a76ae0e5b564184c21c4e65e1398
 size 5905