Training in progress, step 60, checkpoint

Files changed (6) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:791de39fd7b2bdff21473c42060283f782700709bf7b73c60d645581c8fe85a8
 size 180385008

 version https://git-lfs.github.com/spec/v1
+oid sha256:6db9d552d05fd4a02441b274102b06eb4448e823b6f11bbf626b7b2d36ec4589
 size 180385008

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4f25f5da0a81a0919880dbc8fa722b7a85800ee6cfbb9c18d56071e165ee12ff
 size 91850763

 version https://git-lfs.github.com/spec/v1
+oid sha256:a79c6a13851c6b6000b1c5fa96cc061a579e0f7b0b01508685b720f653fc9625
 size 91850763

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d1f3e429ffab9361eb588f03657ab12e499db270bb2234e5408b66a7fc8b7a88
 size 14645

 version https://git-lfs.github.com/spec/v1
+oid sha256:637ef4c9ad794a6c6701f55f48eb4bd08385ff6ecc860fcf54937e38e8bf02e2
 size 14645

last-checkpoint/scaler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6ae2ce584500d5a7dde62b4aada302acca5720fe80f8af7eb0bfd99d0917fc53
 size 1383

 version https://git-lfs.github.com/spec/v1
+oid sha256:8c146ae1cf47c9929c1f0cc98e903ce1070f0c3ea64421f26971b053d42844b7
 size 1383

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3427c4310ecbb2e90c59ae2422bb29ed3ca1f49696ca5f645ad56a90b60b8cb0
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:22ae51817158590b7adfad82fb9a3380e5197063501e610f9eaa5c6decb93fd2
 size 1465

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.3873239436619718,
   "eval_steps": 5,
-  "global_step": 55,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -173,6 +173,21 @@
       "eval_samples_per_second": 36.927,
       "eval_steps_per_second": 9.232,
       "step": 55
     }
   ],
   "logging_steps": 5,
@@ -187,12 +202,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1169628078428160.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.4225352112676056,
   "eval_steps": 5,
+  "global_step": 60,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 36.927,
       "eval_steps_per_second": 9.232,
       "step": 55
+    },
+    {
+      "epoch": 0.4225352112676056,
+      "grad_norm": 0.31429925560951233,
+      "learning_rate": 3.636363636363636e-06,
+      "loss": 3.8459,
+      "step": 60
+    },
+    {
+      "epoch": 0.4225352112676056,
+      "eval_loss": 5.81058406829834,
+      "eval_runtime": 1.5942,
+      "eval_samples_per_second": 37.636,
+      "eval_steps_per_second": 9.409,
+      "step": 60
     }
   ],
   "logging_steps": 5,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1267677987962880.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null