Training in progress, step 320, checkpoint

Files changed (12) hide show

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8b02f3bea30ba5f4000f2757ed1fa28858b738010261b54d239d104557caaaf1
 size 136062744

 version https://git-lfs.github.com/spec/v1
+oid sha256:7632ba1b08e41a9688dc652015271c5554d97ca9c7a3699508fa42877e5f157f
 size 136062744

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bf4819b116e20e0cf6edb7b9a514d0c38a1681ed9fb664e8b1387fb6e27e99ca
 size 272133812

 version https://git-lfs.github.com/spec/v1
+oid sha256:e2a4f822a3e0dfeabb1f9b45e30eea832fd24d20337f8008cdf4a3cc28b90b13
 size 272133812

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a021bf8fc1bd19adcc1376dcee299d8a04aa1a7952251bad1e317521748875b7
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:85dd38c6cdc192788a99895f3084ff7dbd3299b657e7e5a24e6ed465f9344b6a
 size 15984

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:972bd8028f20fce3573923f9a005027e0d260b48904a7835117b203b223afc00
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:1d4cb67e36f2b83aebca6d9cfc2016780d42e305cfdcda7c59d7e83846c493f9
 size 15984

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:346eed5082d2ec3cd3b01057d77087e12608217fe1db4e2cc48a1c635d2b350f
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:a256cde06a990790428bb39e3cafeb2de78bab76c12cd4e277082638050d6136
 size 15984

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b8617efc3472b5b019323214ba6f5ff8db4e44b4f8dae20eed03655cbbbdeade
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:3f056289e15f55441b8e468a55ec32f1a2b7873d4c0b0e58213f81abf037a634
 size 15984

last-checkpoint/rng_state_4.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b57d29062e2b005ef9338306d164f8255bdbae7cde6979cc3c6601ddda4f3ab8
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:f8c207fa1f410f5283ce28d883ff8a114b142d07980cca571866b27f63ca8f99
 size 15984

last-checkpoint/rng_state_5.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8a7294aa3359669dd9326bd90c9c9925cb82746ef54c24e800e87f0555b79b28
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:07d4cbf9fa98068d1cc83eaeae8931895e6a693ef1347a3dcc43ea278a4fd3e5
 size 15984

last-checkpoint/rng_state_6.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:712c430310813615c3ea651f2526c9d2f01c1c820ddb6bf6771a7983456fddf7
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:756c6f30b0c1263bcdec8435dcac3071f9f0d1b0de426bda07a81d2e513ee832
 size 15984

last-checkpoint/rng_state_7.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b34f186dd0d24d76f54563508cd3f031ee1155efb43c5a5a9cfcc9ce2e166bce
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:31d40be02311ba43d019df6bdd117f385db008c5f96585c2f8a8e46f02a4a82c
 size 15984

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7f71fff5908e20a1b3137a87f6a31d497f79c8c2b801a7eea008ae86f7863417
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:13ba300eb50af5f07dfdf5f9c8ec3b31766e55d5d321cd8570f14309b439775d
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 5.2843601895734595,
   "eval_steps": 200,
-  "global_step": 280,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -219,6 +219,34 @@
       "learning_rate": 0.00019649409730077935,
       "loss": 1.7822,
       "step": 280
     }
   ],
   "logging_steps": 10,
@@ -238,7 +266,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 3.810376470757376e+16,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 6.037914691943128,
   "eval_steps": 200,
+  "global_step": 320,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 0.00019649409730077935,
       "loss": 1.7822,
       "step": 280
+    },
+    {
+      "epoch": 5.4739336492890995,
+      "grad_norm": 6.0,
+      "learning_rate": 0.000196145982960926,
+      "loss": 1.5687,
+      "step": 290
+    },
+    {
+      "epoch": 5.6635071090047395,
+      "grad_norm": 1.8359375,
+      "learning_rate": 0.00019578173241879872,
+      "loss": 1.7941,
+      "step": 300
+    },
+    {
+      "epoch": 5.8530805687203795,
+      "grad_norm": 1.6875,
+      "learning_rate": 0.00019540140680664913,
+      "loss": 1.7871,
+      "step": 310
+    },
+    {
+      "epoch": 6.037914691943128,
+      "grad_norm": 1.4921875,
+      "learning_rate": 0.0001950050699546116,
+      "loss": 1.7118,
+      "step": 320
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 4.353739574791373e+16,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null