Training in progress, step 41612, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +291 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1ffe6d423820df80905bdc0d2e0141ea93fec4c98e1080dddba651c9575bab46
 size 83945296

 version https://git-lfs.github.com/spec/v1
+oid sha256:aa5577fd1b503ec752baeafdf2de06e57a83deb0e1e57a41cda45bf9427c95e9
 size 83945296

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:34b46b4bc8f69e2993bad79cb5d4cb2101a8c7ba0f5b315a917b1803781f769e
 size 168150738

 version https://git-lfs.github.com/spec/v1
+oid sha256:e723c05be219624f96a2ddaa266b6270ac90d0bba2a1e5f85e21a833ea30c99b
 size 168150738

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:331fcef64d0fd4d570473f6434565278f7bc1c317d278f1f3c839af5759fbcc3
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:45deb07b720f6c59d34e29c1d4b4f3b955196381388fc9d375a97ed58b1dc34e
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:33c07fee6b035b991fac72a5c5f8f0cff6e9a9643aafb454b87dbdd0b8b3f3a6
 size 1000

 version https://git-lfs.github.com/spec/v1
+oid sha256:733fb082b772b237d84bcc94add0fd700f3286fcadb7278845beeba39bafa7fe
 size 1000

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 3.996058829183889,
   "eval_steps": 4619,
-  "global_step": 41571,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -291184,6 +291184,293 @@
       "eval_test_samples_per_second": 12.589,
       "eval_test_steps_per_second": 0.787,
       "step": 41571
     }
   ],
   "logging_steps": 1,
@@ -291198,12 +291485,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.0527481641298231e+19,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 4.0,
   "eval_steps": 4619,
+  "global_step": 41612,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_test_samples_per_second": 12.589,
       "eval_test_steps_per_second": 0.787,
       "step": 41571
+    },
+    {
+      "epoch": 3.996154955301355,
+      "grad_norm": 1.3619403839111328,
+      "learning_rate": 3.00083601127463e-06,
+      "loss": 1.9584,
+      "step": 41572
+    },
+    {
+      "epoch": 3.9962510814188215,
+      "grad_norm": 1.1561639308929443,
+      "learning_rate": 3.0007947332547655e-06,
+      "loss": 1.902,
+      "step": 41573
+    },
+    {
+      "epoch": 3.996347207536288,
+      "grad_norm": 1.2157913446426392,
+      "learning_rate": 3.000754500244385e-06,
+      "loss": 1.8448,
+      "step": 41574
+    },
+    {
+      "epoch": 3.9964433336537537,
+      "grad_norm": 1.290919542312622,
+      "learning_rate": 3.000715312243752e-06,
+      "loss": 2.1262,
+      "step": 41575
+    },
+    {
+      "epoch": 3.9965394597712196,
+      "grad_norm": 1.218934416770935,
+      "learning_rate": 3.00067716925318e-06,
+      "loss": 1.875,
+      "step": 41576
+    },
+    {
+      "epoch": 3.996635585888686,
+      "grad_norm": 1.2158639430999756,
+      "learning_rate": 3.0006400712729e-06,
+      "loss": 1.8464,
+      "step": 41577
+    },
+    {
+      "epoch": 3.9967317120061523,
+      "grad_norm": 1.2294507026672363,
+      "learning_rate": 3.0006040183031917e-06,
+      "loss": 1.8543,
+      "step": 41578
+    },
+    {
+      "epoch": 3.996827838123618,
+      "grad_norm": 1.2768683433532715,
+      "learning_rate": 3.0005690103443025e-06,
+      "loss": 1.9559,
+      "step": 41579
+    },
+    {
+      "epoch": 3.996923964241084,
+      "grad_norm": 1.1993067264556885,
+      "learning_rate": 3.000535047396497e-06,
+      "loss": 1.8008,
+      "step": 41580
+    },
+    {
+      "epoch": 3.9970200903585504,
+      "grad_norm": 1.1862679719924927,
+      "learning_rate": 3.000502129459989e-06,
+      "loss": 1.7558,
+      "step": 41581
+    },
+    {
+      "epoch": 3.997116216476017,
+      "grad_norm": 1.3213326930999756,
+      "learning_rate": 3.000470256535026e-06,
+      "loss": 1.879,
+      "step": 41582
+    },
+    {
+      "epoch": 3.9972123425934827,
+      "grad_norm": 1.3456659317016602,
+      "learning_rate": 3.0004394286218213e-06,
+      "loss": 2.033,
+      "step": 41583
+    },
+    {
+      "epoch": 3.9973084687109486,
+      "grad_norm": 1.140081524848938,
+      "learning_rate": 3.0004096457205907e-06,
+      "loss": 1.8206,
+      "step": 41584
+    },
+    {
+      "epoch": 3.997404594828415,
+      "grad_norm": 1.2168374061584473,
+      "learning_rate": 3.0003809078315644e-06,
+      "loss": 1.7936,
+      "step": 41585
+    },
+    {
+      "epoch": 3.9975007209458813,
+      "grad_norm": 1.3345106840133667,
+      "learning_rate": 3.0003532149549234e-06,
+      "loss": 1.8446,
+      "step": 41586
+    },
+    {
+      "epoch": 3.997596847063347,
+      "grad_norm": 1.368496298789978,
+      "learning_rate": 3.0003265670908657e-06,
+      "loss": 1.8578,
+      "step": 41587
+    },
+    {
+      "epoch": 3.997692973180813,
+      "grad_norm": 1.1256635189056396,
+      "learning_rate": 3.00030096423959e-06,
+      "loss": 1.6587,
+      "step": 41588
+    },
+    {
+      "epoch": 3.9977890992982794,
+      "grad_norm": 1.1468359231948853,
+      "learning_rate": 3.0002764064012764e-06,
+      "loss": 1.8116,
+      "step": 41589
+    },
+    {
+      "epoch": 3.9978852254157453,
+      "grad_norm": 1.3567719459533691,
+      "learning_rate": 3.0002528935760904e-06,
+      "loss": 2.0766,
+      "step": 41590
+    },
+    {
+      "epoch": 3.9979813515332117,
+      "grad_norm": 1.2336657047271729,
+      "learning_rate": 3.0002304257641807e-06,
+      "loss": 1.8646,
+      "step": 41591
+    },
+    {
+      "epoch": 3.9980774776506776,
+      "grad_norm": 1.2067503929138184,
+      "learning_rate": 3.0002090029657445e-06,
+      "loss": 1.7902,
+      "step": 41592
+    },
+    {
+      "epoch": 3.998173603768144,
+      "grad_norm": 1.170020580291748,
+      "learning_rate": 3.0001886251808972e-06,
+      "loss": 1.8235,
+      "step": 41593
+    },
+    {
+      "epoch": 3.99826972988561,
+      "grad_norm": 1.202685832977295,
+      "learning_rate": 3.000169292409788e-06,
+      "loss": 1.8446,
+      "step": 41594
+    },
+    {
+      "epoch": 3.998365856003076,
+      "grad_norm": 1.1868846416473389,
+      "learning_rate": 3.0001510046525807e-06,
+      "loss": 1.9727,
+      "step": 41595
+    },
+    {
+      "epoch": 3.998461982120542,
+      "grad_norm": 1.2645318508148193,
+      "learning_rate": 3.0001337619093587e-06,
+      "loss": 1.8983,
+      "step": 41596
+    },
+    {
+      "epoch": 3.9985581082380084,
+      "grad_norm": 1.2477843761444092,
+      "learning_rate": 3.000117564180286e-06,
+      "loss": 1.9145,
+      "step": 41597
+    },
+    {
+      "epoch": 3.9986542343554743,
+      "grad_norm": 1.0654802322387695,
+      "learning_rate": 3.0001024114654626e-06,
+      "loss": 1.8887,
+      "step": 41598
+    },
+    {
+      "epoch": 3.9987503604729406,
+      "grad_norm": 1.2192732095718384,
+      "learning_rate": 3.000088303764986e-06,
+      "loss": 2.0114,
+      "step": 41599
+    },
+    {
+      "epoch": 3.9988464865904065,
+      "grad_norm": 1.243283987045288,
+      "learning_rate": 3.0000752410789563e-06,
+      "loss": 1.9751,
+      "step": 41600
+    },
+    {
+      "epoch": 3.998942612707873,
+      "grad_norm": 1.2228025197982788,
+      "learning_rate": 3.0000632234074715e-06,
+      "loss": 2.0104,
+      "step": 41601
+    },
+    {
+      "epoch": 3.999038738825339,
+      "grad_norm": 1.0860533714294434,
+      "learning_rate": 3.000052250750632e-06,
+      "loss": 1.5139,
+      "step": 41602
+    },
+    {
+      "epoch": 3.999134864942805,
+      "grad_norm": 1.1760326623916626,
+      "learning_rate": 3.0000423231084854e-06,
+      "loss": 1.7311,
+      "step": 41603
+    },
+    {
+      "epoch": 3.999230991060271,
+      "grad_norm": 1.3011332750320435,
+      "learning_rate": 3.0000334404811154e-06,
+      "loss": 1.8016,
+      "step": 41604
+    },
+    {
+      "epoch": 3.999327117177737,
+      "grad_norm": 1.3964388370513916,
+      "learning_rate": 3.0000256028685704e-06,
+      "loss": 1.9478,
+      "step": 41605
+    },
+    {
+      "epoch": 3.9994232432952033,
+      "grad_norm": 1.4257408380508423,
+      "learning_rate": 3.0000188102709344e-06,
+      "loss": 1.9333,
+      "step": 41606
+    },
+    {
+      "epoch": 3.9995193694126696,
+      "grad_norm": 1.2944358587265015,
+      "learning_rate": 3.0000130626882386e-06,
+      "loss": 1.9648,
+      "step": 41607
+    },
+    {
+      "epoch": 3.9996154955301355,
+      "grad_norm": 1.1968276500701904,
+      "learning_rate": 3.0000083601205175e-06,
+      "loss": 1.7864,
+      "step": 41608
+    },
+    {
+      "epoch": 3.9997116216476014,
+      "grad_norm": 1.2650930881500244,
+      "learning_rate": 3.000004702567804e-06,
+      "loss": 2.0297,
+      "step": 41609
+    },
+    {
+      "epoch": 3.9998077477650678,
+      "grad_norm": 1.3876334428787231,
+      "learning_rate": 3.0000020900301457e-06,
+      "loss": 1.9798,
+      "step": 41610
+    },
+    {
+      "epoch": 3.999903873882534,
+      "grad_norm": 1.294616460800171,
+      "learning_rate": 3.0000005225075443e-06,
+      "loss": 1.8283,
+      "step": 41611
+    },
+    {
+      "epoch": 4.0,
+      "grad_norm": 1.340853214263916,
+      "learning_rate": 2.9999999999999997e-06,
+      "loss": 1.9542,
+      "step": 41612
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.0537773454858715e+19,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null