Training in progress, step 2500, checkpoint

Files changed (5) hide show

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0d83f997012693855cffc76cc9f0284d03c34033bf8dfa18754e4844392487e2
 size 2611614300

 version https://git-lfs.github.com/spec/v1
+oid sha256:48475f083a5c2dd20122d335f8f1642d2b1704a4a54b55ecc266741d8af4decc
 size 2611614300

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:243605873029fbc0a2972efec25e0f01e22e5afe67e67dc13ca375cd9525c6a7
 size 5213028466

 version https://git-lfs.github.com/spec/v1
+oid sha256:fd01a2371483ae6f6e5acd61fd57cc71db67bf7044ba7e1b6b06f33fe8ab5258
 size 5213028466

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8d20f3103c38ed7048ffd97ef9a323e40cf73fc6a4fe1e50af8aeeb52d1fe2ac
 size 14942

 version https://git-lfs.github.com/spec/v1
+oid sha256:d5c2bb2c73c38e1a76a17a11eb153818930706a055ece3df18411968d5d527a9
 size 14942

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:218c032aa5091c27de2cef35285afb12a9d3cbfe80e6248c615057c7bae5ef8e
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:cd7d798f6dc160785a8da7f98e23574f8db4fbcce75b7717e46f600053dad5a1
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.5998488662036323,
   "eval_steps": 500,
-  "global_step": 2400,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -207,6 +207,21 @@
       "learning_rate": 0.0009400149962509373,
       "loss": 972726877552.64,
       "step": 2400
     }
   ],
   "logging_steps": 100,
@@ -226,7 +241,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 4.667711358542807e+17,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.624842568962117,
   "eval_steps": 500,
+  "global_step": 2500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 0.0009400149962509373,
       "loss": 972726877552.64,
       "step": 2400
+    },
+    {
+      "epoch": 0.624842568962117,
+      "grad_norm": 0.6738520264625549,
+      "learning_rate": 0.0009375156210947263,
+      "loss": 1207760825876.48,
+      "step": 2500
+    },
+    {
+      "epoch": 0.624842568962117,
+      "eval_loss": NaN,
+      "eval_runtime": 423.912,
+      "eval_samples_per_second": 36.616,
+      "eval_steps_per_second": 36.616,
+      "step": 2500
     }
   ],
   "logging_steps": 100,
       "attributes": {}
     }
   },
+  "total_flos": 4.862199331815424e+17,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null