Training in progress, step 3400, checkpoint

Files changed (5) hide show

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b79f31453e9489a3b25473dd16de181bf069ee2771bf0473ece225eab902297d
 size 471641972

 version https://git-lfs.github.com/spec/v1
+oid sha256:48393895744f0281dcc35ce037f939cbf0fcb9343f0e49a9b5a5800b72bf3aec
 size 471641972

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:39a3235f25c4792928cfbbcc63e700e273fa3766c9b71ff7b09b54d36a41d5f5
 size 943405434

 version https://git-lfs.github.com/spec/v1
+oid sha256:5927d7bb5b738cb043c868545306c06435eb54da996000898577927f1d970803
 size 943405434

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2fa4bd9692d07369beb5f2f11061992a323c35c7234002085db824618be90174
-size 14180

 version https://git-lfs.github.com/spec/v1
+oid sha256:81805f5d9be60e795c66f48836696944f5979ee35820fd897c90082596563348
+size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:300da17205244156f64f5ed42fd5c64dd46e3af5e1f414de2c4903ba75da856f
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:a7fa458e54f1f79beba7dd8cf425c5f35ff1c5b914a484c940ab3f4fb17abf3b
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.08059641345960104,
   "eval_steps": 1000,
-  "global_step": 3200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -456,6 +456,34 @@
       "learning_rate": 4.597143864598026e-05,
       "loss": 3.8954,
       "step": 3200
     }
   ],
   "logging_steps": 50,
@@ -475,7 +503,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1316360630797248.0,
   "train_batch_size": 32,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.08563368930082611,
   "eval_steps": 1000,
+  "global_step": 3400,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 4.597143864598026e-05,
       "loss": 3.8954,
       "step": 3200
+    },
+    {
+      "epoch": 0.08185573241990732,
+      "grad_norm": 7.927274227142334,
+      "learning_rate": 4.5908472697964946e-05,
+      "loss": 3.9952,
+      "step": 3250
+    },
+    {
+      "epoch": 0.08311505138021358,
+      "grad_norm": 9.124794006347656,
+      "learning_rate": 4.584550674994963e-05,
+      "loss": 3.8936,
+      "step": 3300
+    },
+    {
+      "epoch": 0.08437437034051985,
+      "grad_norm": 8.415815353393555,
+      "learning_rate": 4.578254080193432e-05,
+      "loss": 3.9416,
+      "step": 3350
+    },
+    {
+      "epoch": 0.08563368930082611,
+      "grad_norm": 7.427456378936768,
+      "learning_rate": 4.5719574853919e-05,
+      "loss": 3.8502,
+      "step": 3400
     }
   ],
   "logging_steps": 50,
       "attributes": {}
     }
   },
+  "total_flos": 1399136881240896.0,
   "train_batch_size": 32,
   "trial_name": null,
   "trial_params": null