Training in progress, step 167, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +299 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6d9d39744d7d6dcdc045382bac949896cdd56c3a51acb3382c1ff30184fb9b54
 size 83945296

 version https://git-lfs.github.com/spec/v1
+oid sha256:63f0232e7b506fef7d2e12841b36fc5ab0b51d545c78f674b38b70d223ec68fe
 size 83945296

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3678af00249a9bc4835e5ac12d301b949f9bb1ee506b7f7fb1ce32b86be1bd77
 size 43122580

 version https://git-lfs.github.com/spec/v1
+oid sha256:176f10ff9a1721647deff2714511e6fb56ada5350eb89f6a8a6cc2d1ebf6a1c6
 size 43122580

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b5b33506a82a92e59a5bc28b8ff979fe9268fe5f28fc8ffdedd6dfba1330cfc6
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:4740969335d0ca1e5851b9a3ee510297bb49177fbaa8e72e5119db37858a218d
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fbd7bd812f49d7c127aa907e5f80ff84e361879563f5604f92118217b0128fd9
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:a92317938583156fc0a1371ce3c4446db203520cf84ded2f12700846a207afcb
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.7578947368421053,
   "eval_steps": 500,
-  "global_step": 126,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -889,6 +889,301 @@
       "learning_rate": 1.4989132611641576e-05,
       "loss": 1.2182,
       "step": 126
     }
   ],
   "logging_steps": 1,
@@ -903,12 +1198,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 9.340299453136896e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 1.0045112781954888,
   "eval_steps": 500,
+  "global_step": 167,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 1.4989132611641576e-05,
       "loss": 1.2182,
       "step": 126
+    },
+    {
+      "epoch": 0.7639097744360902,
+      "grad_norm": 0.864700436592102,
+      "learning_rate": 1.4303513272105057e-05,
+      "loss": 1.3224,
+      "step": 127
+    },
+    {
+      "epoch": 0.7699248120300752,
+      "grad_norm": 0.8970008492469788,
+      "learning_rate": 1.3631317921347563e-05,
+      "loss": 1.3282,
+      "step": 128
+    },
+    {
+      "epoch": 0.7759398496240602,
+      "grad_norm": 0.9462831020355225,
+      "learning_rate": 1.297279934454978e-05,
+      "loss": 1.3372,
+      "step": 129
+    },
+    {
+      "epoch": 0.7819548872180451,
+      "grad_norm": 0.8102074861526489,
+      "learning_rate": 1.2328205183616965e-05,
+      "loss": 1.1941,
+      "step": 130
+    },
+    {
+      "epoch": 0.7879699248120301,
+      "grad_norm": 0.8798099756240845,
+      "learning_rate": 1.1697777844051105e-05,
+      "loss": 1.3555,
+      "step": 131
+    },
+    {
+      "epoch": 0.793984962406015,
+      "grad_norm": 0.9303496479988098,
+      "learning_rate": 1.1081754403791999e-05,
+      "loss": 1.1306,
+      "step": 132
+    },
+    {
+      "epoch": 0.8,
+      "grad_norm": 1.058530569076538,
+      "learning_rate": 1.0480366524062042e-05,
+      "loss": 1.5215,
+      "step": 133
+    },
+    {
+      "epoch": 0.806015037593985,
+      "grad_norm": 0.9745376110076904,
+      "learning_rate": 9.893840362247809e-06,
+      "loss": 1.5797,
+      "step": 134
+    },
+    {
+      "epoch": 0.8120300751879699,
+      "grad_norm": 0.9844892621040344,
+      "learning_rate": 9.322396486851626e-06,
+      "loss": 1.5185,
+      "step": 135
+    },
+    {
+      "epoch": 0.8180451127819549,
+      "grad_norm": 1.0114489793777466,
+      "learning_rate": 8.766249794544662e-06,
+      "loss": 1.44,
+      "step": 136
+    },
+    {
+      "epoch": 0.8240601503759398,
+      "grad_norm": 1.1200000047683716,
+      "learning_rate": 8.225609429353187e-06,
+      "loss": 1.3672,
+      "step": 137
+    },
+    {
+      "epoch": 0.8300751879699249,
+      "grad_norm": 1.17137610912323,
+      "learning_rate": 7.700678704007947e-06,
+      "loss": 1.5414,
+      "step": 138
+    },
+    {
+      "epoch": 0.8360902255639098,
+      "grad_norm": 1.447938084602356,
+      "learning_rate": 7.191655023486682e-06,
+      "loss": 1.561,
+      "step": 139
+    },
+    {
+      "epoch": 0.8421052631578947,
+      "grad_norm": 1.2338807582855225,
+      "learning_rate": 6.698729810778065e-06,
+      "loss": 1.3352,
+      "step": 140
+    },
+    {
+      "epoch": 0.8481203007518797,
+      "grad_norm": 1.2002118825912476,
+      "learning_rate": 6.222088434895462e-06,
+      "loss": 1.4649,
+      "step": 141
+    },
+    {
+      "epoch": 0.8541353383458646,
+      "grad_norm": 1.2546420097351074,
+      "learning_rate": 5.7619101411671095e-06,
+      "loss": 1.3337,
+      "step": 142
+    },
+    {
+      "epoch": 0.8601503759398497,
+      "grad_norm": 1.2262368202209473,
+      "learning_rate": 5.318367983829392e-06,
+      "loss": 1.395,
+      "step": 143
+    },
+    {
+      "epoch": 0.8661654135338346,
+      "grad_norm": 1.2686101198196411,
+      "learning_rate": 4.891628760948114e-06,
+      "loss": 1.527,
+      "step": 144
+    },
+    {
+      "epoch": 0.8721804511278195,
+      "grad_norm": 1.2336403131484985,
+      "learning_rate": 4.4818529516926726e-06,
+      "loss": 1.4527,
+      "step": 145
+    },
+    {
+      "epoch": 0.8781954887218045,
+      "grad_norm": 1.3499011993408203,
+      "learning_rate": 4.089194655986306e-06,
+      "loss": 1.3562,
+      "step": 146
+    },
+    {
+      "epoch": 0.8842105263157894,
+      "grad_norm": 1.4406931400299072,
+      "learning_rate": 3.7138015365554833e-06,
+      "loss": 1.4555,
+      "step": 147
+    },
+    {
+      "epoch": 0.8902255639097745,
+      "grad_norm": 1.434403419494629,
+      "learning_rate": 3.3558147633999728e-06,
+      "loss": 1.4964,
+      "step": 148
+    },
+    {
+      "epoch": 0.8962406015037594,
+      "grad_norm": 1.4196852445602417,
+      "learning_rate": 3.0153689607045845e-06,
+      "loss": 1.3747,
+      "step": 149
+    },
+    {
+      "epoch": 0.9022556390977443,
+      "grad_norm": 1.5109477043151855,
+      "learning_rate": 2.692592156212487e-06,
+      "loss": 1.4056,
+      "step": 150
+    },
+    {
+      "epoch": 0.9082706766917293,
+      "grad_norm": 1.2970283031463623,
+      "learning_rate": 2.3876057330792346e-06,
+      "loss": 1.3722,
+      "step": 151
+    },
+    {
+      "epoch": 0.9142857142857143,
+      "grad_norm": 1.3502446413040161,
+      "learning_rate": 2.100524384225555e-06,
+      "loss": 1.5935,
+      "step": 152
+    },
+    {
+      "epoch": 0.9203007518796993,
+      "grad_norm": 1.3725895881652832,
+      "learning_rate": 1.8314560692059835e-06,
+      "loss": 1.4272,
+      "step": 153
+    },
+    {
+      "epoch": 0.9263157894736842,
+      "grad_norm": 1.4336990118026733,
+      "learning_rate": 1.5805019736097104e-06,
+      "loss": 1.4357,
+      "step": 154
+    },
+    {
+      "epoch": 0.9323308270676691,
+      "grad_norm": 1.3459137678146362,
+      "learning_rate": 1.3477564710088098e-06,
+      "loss": 1.4523,
+      "step": 155
+    },
+    {
+      "epoch": 0.9383458646616541,
+      "grad_norm": 1.516906499862671,
+      "learning_rate": 1.1333070874682216e-06,
+      "loss": 1.4304,
+      "step": 156
+    },
+    {
+      "epoch": 0.9443609022556391,
+      "grad_norm": 1.4473116397857666,
+      "learning_rate": 9.372344686307655e-07,
+      "loss": 1.4226,
+      "step": 157
+    },
+    {
+      "epoch": 0.9503759398496241,
+      "grad_norm": 1.4904662370681763,
+      "learning_rate": 7.596123493895991e-07,
+      "loss": 1.5851,
+      "step": 158
+    },
+    {
+      "epoch": 0.956390977443609,
+      "grad_norm": 1.6148744821548462,
+      "learning_rate": 6.005075261595494e-07,
+      "loss": 1.6208,
+      "step": 159
+    },
+    {
+      "epoch": 0.9624060150375939,
+      "grad_norm": 1.755981683731079,
+      "learning_rate": 4.5997983175773417e-07,
+      "loss": 1.4813,
+      "step": 160
+    },
+    {
+      "epoch": 0.968421052631579,
+      "grad_norm": 1.6025625467300415,
+      "learning_rate": 3.380821129028489e-07,
+      "loss": 1.479,
+      "step": 161
+    },
+    {
+      "epoch": 0.9744360902255639,
+      "grad_norm": 2.444580316543579,
+      "learning_rate": 2.3486021034170857e-07,
+      "loss": 1.7314,
+      "step": 162
+    },
+    {
+      "epoch": 0.9804511278195489,
+      "grad_norm": 6.058820724487305,
+      "learning_rate": 1.503529416103988e-07,
+      "loss": 2.5463,
+      "step": 163
+    },
+    {
+      "epoch": 0.9864661654135338,
+      "grad_norm": 6.485790729522705,
+      "learning_rate": 8.459208643659122e-08,
+      "loss": 3.2891,
+      "step": 164
+    },
+    {
+      "epoch": 0.9924812030075187,
+      "grad_norm": 0.9536194801330566,
+      "learning_rate": 3.760237478849793e-08,
+      "loss": 1.2774,
+      "step": 165
+    },
+    {
+      "epoch": 0.9984962406015038,
+      "grad_norm": 1.3168344497680664,
+      "learning_rate": 9.401477574932926e-09,
+      "loss": 1.4629,
+      "step": 166
+    },
+    {
+      "epoch": 0.9984962406015038,
+      "eval_loss": 1.425169587135315,
+      "eval_runtime": 7.4236,
+      "eval_samples_per_second": 18.859,
+      "eval_steps_per_second": 4.715,
+      "step": 166
+    },
+    {
+      "epoch": 1.0045112781954888,
+      "grad_norm": 4.509108543395996,
+      "learning_rate": 0.0,
+      "loss": 3.1146,
+      "step": 167
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.2392080462577664e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null