Training in progress, step 100, checkpoint

Files changed (6) hide show

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ac3c5cb2e77aa09de86102669fab0a58c5fbb2fac38941de01eaa06680ecd6b5
 size 2066752

 version https://git-lfs.github.com/spec/v1
+oid sha256:40c2ecd92e57b2e66ca22969473f53cb6e090828806462e132c671e6e2d3f419
 size 2066752

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7de1afa08e042afe2b7724794e5abc6b3c2844ee734ce29154ef19d4db24a536
 size 2162798

 version https://git-lfs.github.com/spec/v1
+oid sha256:e5b74b9dc7e89f78c99f07af87ab50ab5df7a6480691241092ee5f8486f93d4d
 size 2162798

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e4789394f5e9bf3695e87ba89acf1261237e8f92d8ca463a31a9d39185674969
 size 14512

 version https://git-lfs.github.com/spec/v1
+oid sha256:bddb8fcdae8dea7e10388731f63246ec0494e51d6d509273113166b14d83f0e3
 size 14512

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:95830d8f799e5857092ffa30bec2dad56d615ec4ba20eae91e7157878213510f
 size 14512

 version https://git-lfs.github.com/spec/v1
+oid sha256:24238ca2ad5e52c7a2d983c253be8a0ab25f258c2785c7ba56c4bfa08fb89d84
 size 14512

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5258a10d326b43bb322f9f7412c08a5d462c4dd53d2018b15d95caea3824d34f
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:d429d3d3635edcf38935f000b0d1f4e5db465042c289fb4623c33dce588231ab
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 6.153846153846154,
   "eval_steps": 40,
-  "global_step": 80,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -87,6 +87,20 @@
       "eval_samples_per_second": 285.294,
       "eval_steps_per_second": 4.562,
       "step": 80
     }
   ],
   "logging_steps": 10,
@@ -106,7 +120,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 32732683960320.0,
   "train_batch_size": 32,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 7.6923076923076925,
   "eval_steps": 40,
+  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 285.294,
       "eval_steps_per_second": 4.562,
       "step": 80
+    },
+    {
+      "epoch": 6.923076923076923,
+      "grad_norm": 0.33203125,
+      "learning_rate": 0.00019458172417006347,
+      "loss": 10.0565,
+      "step": 90
+    },
+    {
+      "epoch": 7.6923076923076925,
+      "grad_norm": 0.341796875,
+      "learning_rate": 0.00019264940672148018,
+      "loss": 9.998,
+      "step": 100
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 40915854950400.0,
   "train_batch_size": 32,
   "trial_name": null,
   "trial_params": null