Training in progress, step 322, checkpoint

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b5e97d7f8deab5684b15ac25e015cf07b4873deafd40128243449e9005c3cd82
 size 147770496

 version https://git-lfs.github.com/spec/v1
+oid sha256:90be58b78feaec07f3dd645fc25daef6abb93eb2bf58d736c709779d8d9b40d1
 size 147770496

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ce8f9b29804ad47fd460f6b94823b515a18cf2d8d1ece23860cda36616e0e958
 size 75455810

 version https://git-lfs.github.com/spec/v1
+oid sha256:b9e8b71d1edb3e5fd7f93f13cf098bc846fedef444351f31f957cc9fb4d52c40
 size 75455810

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:882b179507ebc1af739e17444cb29d9c8e6428e189ae98ef0f166fb92fdff268
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:3739c406ecf8641a3ed60442913b2ca4198babe9654ae86a7990d0c3f9a2542a
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:98ebdbbdc57822db6e58d9d3ead66e0d2c6005e998969732b2d5af225744b60b
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:28ac24de07ee3317eeb7358849fb34d11c4526fd90377fc538ffac93f31faefd
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.9950339721109327,
   "eval_steps": 500,
-  "global_step": 321,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2254,6 +2254,13 @@
       "learning_rate": 2.486652202848827e-10,
       "loss": 1.6111,
       "step": 321
     }
   ],
   "logging_steps": 1.0,
@@ -2268,12 +2275,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 2.5393388307779543e+18,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.9981337664165743,
   "eval_steps": 500,
+  "global_step": 322,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 2.486652202848827e-10,
       "loss": 1.6111,
       "step": 321
+    },
+    {
+      "epoch": 0.9981337664165743,
+      "grad_norm": 0.0545884370803833,
+      "learning_rate": 0.0,
+      "loss": 1.6211,
+      "step": 322
     }
   ],
   "logging_steps": 1.0,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 2.547514766592221e+18,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null