Training in progress, step 300, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +152 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0f7f2a15b6de18ff56263960023f120d936179ecf9ff8822fb4dbf6bf7949d0e
 size 335604696

 version https://git-lfs.github.com/spec/v1
+oid sha256:d7af36e198ceb1d302cb562bbb3dc9a7375d049e35455f558af1c5f3575bf1d7
 size 335604696

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:da793d08115ae91a604aac5fc0bcd54cc608654003d8c32e73f87846089873ac
 size 170920532

 version https://git-lfs.github.com/spec/v1
+oid sha256:5fe6d668006fb535878145ee06b5142e4d3416d48a3a22d19887946d60b1ed74
 size 170920532

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a1b98aafa34eb44815b8c81dd5a5c9ec149b14b0ce72824e63c08d5dafe68f81
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:f5510a5ba9c2306cb1cc1948c25f245584ae6bb11cae37474b32d1e5b9d035c3
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fa1e27bc8c4f6272ee858bf97369b9e68f7265e3e9a72207bbd5098643e86719
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:0d93420319c4318ff13366855f16b6ec61d99b866bdf2a20293a1621b040b36f
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.2687623732677425,
   "eval_steps": 20,
-  "global_step": 280,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2087,6 +2087,154 @@
       "eval_samples_per_second": 3.965,
       "eval_steps_per_second": 3.965,
       "step": 280
     }
   ],
   "logging_steps": 1,
@@ -2101,12 +2249,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.132444992405504e+17,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.28795968564400987,
   "eval_steps": 20,
+  "global_step": 300,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 3.965,
       "eval_steps_per_second": 3.965,
       "step": 280
+    },
+    {
+      "epoch": 0.2697222388865559,
+      "grad_norm": 1.7310993671417236,
+      "learning_rate": 3.166200546578718e-06,
+      "loss": 2.5755,
+      "step": 281
+    },
+    {
+      "epoch": 0.2706821045053692,
+      "grad_norm": 1.7038969993591309,
+      "learning_rate": 2.8427160470641253e-06,
+      "loss": 2.5515,
+      "step": 282
+    },
+    {
+      "epoch": 0.2716419701241826,
+      "grad_norm": 1.7379988431930542,
+      "learning_rate": 2.5365011072835117e-06,
+      "loss": 2.624,
+      "step": 283
+    },
+    {
+      "epoch": 0.27260183574299596,
+      "grad_norm": 1.8372347354888916,
+      "learning_rate": 2.2475916629177415e-06,
+      "loss": 2.6323,
+      "step": 284
+    },
+    {
+      "epoch": 0.27356170136180935,
+      "grad_norm": 1.8558735847473145,
+      "learning_rate": 1.9760216187710787e-06,
+      "loss": 2.5233,
+      "step": 285
+    },
+    {
+      "epoch": 0.2745215669806227,
+      "grad_norm": 1.8913716077804565,
+      "learning_rate": 1.7218228447922867e-06,
+      "loss": 2.4645,
+      "step": 286
+    },
+    {
+      "epoch": 0.2754814325994361,
+      "grad_norm": 2.0636091232299805,
+      "learning_rate": 1.4850251723345196e-06,
+      "loss": 2.6357,
+      "step": 287
+    },
+    {
+      "epoch": 0.2764412982182494,
+      "grad_norm": 1.8926513195037842,
+      "learning_rate": 1.2656563906545902e-06,
+      "loss": 2.4288,
+      "step": 288
+    },
+    {
+      "epoch": 0.2774011638370628,
+      "grad_norm": 2.2932565212249756,
+      "learning_rate": 1.0637422436516274e-06,
+      "loss": 2.5419,
+      "step": 289
+    },
+    {
+      "epoch": 0.27836102945587615,
+      "grad_norm": 2.0671229362487793,
+      "learning_rate": 8.793064268460604e-07,
+      "loss": 2.6396,
+      "step": 290
+    },
+    {
+      "epoch": 0.27932089507468955,
+      "grad_norm": 2.107043743133545,
+      "learning_rate": 7.123705845987093e-07,
+      "loss": 2.6949,
+      "step": 291
+    },
+    {
+      "epoch": 0.2802807606935029,
+      "grad_norm": 2.4163978099823,
+      "learning_rate": 5.629543075708176e-07,
+      "loss": 2.6468,
+      "step": 292
+    },
+    {
+      "epoch": 0.2812406263123163,
+      "grad_norm": 2.3940374851226807,
+      "learning_rate": 4.310751304249738e-07,
+      "loss": 2.8079,
+      "step": 293
+    },
+    {
+      "epoch": 0.2822004919311296,
+      "grad_norm": 2.288954973220825,
+      "learning_rate": 3.167485297673411e-07,
+      "loss": 2.5619,
+      "step": 294
+    },
+    {
+      "epoch": 0.283160357549943,
+      "grad_norm": 2.4529731273651123,
+      "learning_rate": 2.1998792233142714e-07,
+      "loss": 2.539,
+      "step": 295
+    },
+    {
+      "epoch": 0.28412022316875635,
+      "grad_norm": 2.7025234699249268,
+      "learning_rate": 1.4080466340349316e-07,
+      "loss": 2.7581,
+      "step": 296
+    },
+    {
+      "epoch": 0.28508008878756974,
+      "grad_norm": 2.71325421333313,
+      "learning_rate": 7.92080454900701e-08,
+      "loss": 2.4698,
+      "step": 297
+    },
+    {
+      "epoch": 0.2860399544063831,
+      "grad_norm": 3.07059907913208,
+      "learning_rate": 3.5205297227380855e-08,
+      "loss": 2.6203,
+      "step": 298
+    },
+    {
+      "epoch": 0.2869998200251965,
+      "grad_norm": 3.6069223880767822,
+      "learning_rate": 8.801582533035644e-09,
+      "loss": 2.6736,
+      "step": 299
+    },
+    {
+      "epoch": 0.28795968564400987,
+      "grad_norm": 4.671467304229736,
+      "learning_rate": 0.0,
+      "loss": 2.4599,
+      "step": 300
+    },
+    {
+      "epoch": 0.28795968564400987,
+      "eval_loss": 2.5693867206573486,
+      "eval_runtime": 86.0535,
+      "eval_samples_per_second": 3.963,
+      "eval_steps_per_second": 3.963,
+      "step": 300
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.205066715365376e+17,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null