Training in progress, step 16000, checkpoint

Files changed (7) hide show

last-checkpoint/README.md CHANGED Viewed

@@ -1343,6 +1343,10 @@ You can finetune this model on your own dataset.
 | 0.2774 | 15700 | 0.4377        |
 | 0.2783 | 15750 | 0.4041        |
 | 0.2792 | 15800 | 0.375         |
 </details>

 | 0.2774 | 15700 | 0.4377        |
 | 0.2783 | 15750 | 0.4041        |
 | 0.2792 | 15800 | 0.375         |
+| 0.2801 | 15850 | 0.3339        |
+| 0.2810 | 15900 | 0.348         |
+| 0.2818 | 15950 | 0.367         |
+| 0.2827 | 16000 | 0.3427        |
 </details>

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e3d55e45825a3c654f0b7eaf0ef140162fe612ee219ff9374e5295592d1444b9
 size 90864192

 version https://git-lfs.github.com/spec/v1
+oid sha256:5fd95a7943e2d7fca893eb5bdbe9b3d01703e4d80ed047d04c1a67e2b9b33397
 size 90864192

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b3a74062f2a6f56b8b0e799cc086809df8bd6dad796e1d4cdc42ce905c2701af
 size 180609210

 version https://git-lfs.github.com/spec/v1
+oid sha256:35f4813f7d86633bd1ed04417d2d1313c405edfb90b56a8c930cfc1d1cf2addb
 size 180609210

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d06578a61ae44cb9a0de8333b92d3619c8147705d79a090a8054c07810546369
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:9f746250cbdbcfc4b3753e05fc3f2d0b47904bb4b557a3820295b22d838b42f6
 size 14244

last-checkpoint/scaler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9e95ff6f2b0a235804a0c3d7872a59d6fa2f6701ff13d24c3cdd11982303ea58
 size 988

 version https://git-lfs.github.com/spec/v1
+oid sha256:218ae04f7dbf6951d970ca21a27b311e58e61f1cd335681a08b187b9413a828c
 size 988

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:adfca4433908d3fab5c9a1fa33bf6028dd834c726d10f65fd18b1dbca2367df8
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:c56b223eeb93b57918d558b5af382f71a700beb8d1cf640c27db1796abbd5d2f
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.27919633864041987,
   "eval_steps": 500,
-  "global_step": 15800,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2220,6 +2220,34 @@
       "learning_rate": 4.00492823624119e-05,
       "loss": 0.375,
       "step": 15800
     }
   ],
   "logging_steps": 50,

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.28273046950928593,
   "eval_steps": 500,
+  "global_step": 16000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 4.00492823624119e-05,
       "loss": 0.375,
       "step": 15800
+    },
+    {
+      "epoch": 0.2800798713576364,
+      "grad_norm": 1.9363830089569092,
+      "learning_rate": 4.0000196344073355e-05,
+      "loss": 0.3339,
+      "step": 15850
+    },
+    {
+      "epoch": 0.2809634040748529,
+      "grad_norm": 2.208641767501831,
+      "learning_rate": 3.995111032573482e-05,
+      "loss": 0.348,
+      "step": 15900
+    },
+    {
+      "epoch": 0.2818469367920694,
+      "grad_norm": 1.5789657831192017,
+      "learning_rate": 3.9902024307396284e-05,
+      "loss": 0.367,
+      "step": 15950
+    },
+    {
+      "epoch": 0.28273046950928593,
+      "grad_norm": 1.6666336059570312,
+      "learning_rate": 3.985293828905775e-05,
+      "loss": 0.3427,
+      "step": 16000
     }
   ],
   "logging_steps": 50,