Training in progress, step 20, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +73 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:44b977ee477885a307d29621bacd3fc0e940328cfdaacfb6d47689a78a63745e
 size 75579856

 version https://git-lfs.github.com/spec/v1
+oid sha256:e5fb75a4a36553b73994dbd2353c33c1160b78c14db20d5475338392d9c4157d
 size 75579856

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:df69cf2532df815e8d4be378229c606c5c64b77332e5edd954d162369652f8a1
 size 39015574

 version https://git-lfs.github.com/spec/v1
+oid sha256:6f97ab80c0e9ed284bc1431c9250044bcd13f5ab7aee843eb60252e95c35c049
 size 39015574

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:77653f901a737c32c712d4d98c59bda07889be56e9af169aa9525194aa1f203a
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:b432237b00d1d90d6724248db250f29e52844d78501ed7ee56e7727e8a88c36f
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7b240dd9ddd423073bd07780b2cb840e4271f59c698cb81fac8efa021107c82d
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:8a625473bb0a513e1727ebc85c0930216deed7d5ec1e4fd299b7895bbd369bcc
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.006916825177243645,
   "eval_steps": 500,
-  "global_step": 10,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -78,6 +78,76 @@
       "learning_rate": 1.2328767123287671e-05,
       "loss": 2.86,
       "step": 10
     }
   ],
   "logging_steps": 1,
@@ -97,7 +167,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 347472609361920.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.01383365035448729,
   "eval_steps": 500,
+  "global_step": 20,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 1.2328767123287671e-05,
       "loss": 2.86,
       "step": 10
+    },
+    {
+      "epoch": 0.00760850769496801,
+      "grad_norm": 0.3276166021823883,
+      "learning_rate": 1.3698630136986302e-05,
+      "loss": 2.5563,
+      "step": 11
+    },
+    {
+      "epoch": 0.008300190212692375,
+      "grad_norm": 0.6293095350265503,
+      "learning_rate": 1.5068493150684931e-05,
+      "loss": 1.6394,
+      "step": 12
+    },
+    {
+      "epoch": 0.008991872730416739,
+      "grad_norm": 0.3222309648990631,
+      "learning_rate": 1.643835616438356e-05,
+      "loss": 1.8561,
+      "step": 13
+    },
+    {
+      "epoch": 0.009683555248141103,
+      "grad_norm": 0.2540973126888275,
+      "learning_rate": 1.780821917808219e-05,
+      "loss": 3.0327,
+      "step": 14
+    },
+    {
+      "epoch": 0.010375237765865467,
+      "grad_norm": 0.2383764237165451,
+      "learning_rate": 1.9178082191780822e-05,
+      "loss": 2.2992,
+      "step": 15
+    },
+    {
+      "epoch": 0.011066920283589833,
+      "grad_norm": 0.35212621092796326,
+      "learning_rate": 2.0547945205479453e-05,
+      "loss": 2.8462,
+      "step": 16
+    },
+    {
+      "epoch": 0.011758602801314197,
+      "grad_norm": 0.23731686174869537,
+      "learning_rate": 2.1917808219178083e-05,
+      "loss": 2.8213,
+      "step": 17
+    },
+    {
+      "epoch": 0.012450285319038561,
+      "grad_norm": 0.3209351599216461,
+      "learning_rate": 2.328767123287671e-05,
+      "loss": 2.4589,
+      "step": 18
+    },
+    {
+      "epoch": 0.013141967836762926,
+      "grad_norm": 0.22553832828998566,
+      "learning_rate": 2.4657534246575342e-05,
+      "loss": 2.358,
+      "step": 19
+    },
+    {
+      "epoch": 0.01383365035448729,
+      "grad_norm": 0.29168522357940674,
+      "learning_rate": 2.6027397260273973e-05,
+      "loss": 2.5836,
+      "step": 20
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 673991251476480.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null