Training in progress, epoch 2, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +33 -125
last-checkpoint/training_args.bin +1 -1

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:59f68e78e982909d4e878fb37ffc83266760fb6dae40dac940b403410ce90a97
 size 2536

 version https://git-lfs.github.com/spec/v1
+oid sha256:714661aaef8e2af2d7ec7d69e1adb728bae869801f07397b14edb899af2ee9a2
 size 2536

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:95b6047bd8cc6f4cdf7c46dea47edb8e542435510070c6cd1e0a7d9ccf5fd7da
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:435c6f28df4d46a1bb36792295b64bf8fda402d0bd8eeee291d4535762bfc591
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4933c471fb1e4ba81de00146ddd721361901645c866fd1d76662b6837ae85d16
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:87f68093f64fb3790e94fd47e9bf45be6d11c09381e54b12e7194571e6bc1ba5
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 3.0,
   "eval_steps": 500,
-  "global_step": 894,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -11,271 +11,179 @@
     {
       "epoch": 0.08389261744966443,
       "grad_norm": 0.0,
-      "learning_rate": 1.851851851851852e-05,
       "loss": 4.5636,
       "step": 25
     },
     {
       "epoch": 0.16778523489932887,
       "grad_norm": 0.0,
-      "learning_rate": 1.9965291500546865e-05,
       "loss": 4.5267,
       "step": 50
     },
     {
       "epoch": 0.2516778523489933,
       "grad_norm": 0.0,
-      "learning_rate": 1.984912443051131e-05,
       "loss": 4.6574,
       "step": 75
     },
     {
       "epoch": 0.33557046979865773,
       "grad_norm": 0.0,
-      "learning_rate": 1.965218883028299e-05,
       "loss": 4.589,
       "step": 100
     },
     {
       "epoch": 0.41946308724832215,
       "grad_norm": 0.0,
-      "learning_rate": 1.9376099685953836e-05,
       "loss": 4.6256,
       "step": 125
     },
     {
       "epoch": 0.5033557046979866,
       "grad_norm": 0.0,
-      "learning_rate": 1.9023121088565353e-05,
       "loss": 4.4971,
       "step": 150
     },
     {
       "epoch": 0.587248322147651,
       "grad_norm": 0.0,
-      "learning_rate": 1.8596147667249457e-05,
       "loss": 4.7099,
       "step": 175
     },
     {
       "epoch": 0.6711409395973155,
       "grad_norm": 0.0,
-      "learning_rate": 1.8098680851591538e-05,
       "loss": 4.619,
       "step": 200
     },
     {
       "epoch": 0.7550335570469798,
       "grad_norm": 0.0,
-      "learning_rate": 1.753480015787792e-05,
       "loss": 4.6121,
       "step": 225
     },
     {
       "epoch": 0.8389261744966443,
       "grad_norm": 0.0,
-      "learning_rate": 1.6909129734697306e-05,
       "loss": 4.4854,
       "step": 250
     },
     {
       "epoch": 0.9228187919463087,
       "grad_norm": 0.0,
-      "learning_rate": 1.6226800442241582e-05,
       "loss": 4.6153,
       "step": 275
     },
     {
       "epoch": 1.0,
       "eval_loss": 4.619876861572266,
-      "eval_runtime": 1.7691,
-      "eval_samples_per_second": 71.223,
-      "eval_steps_per_second": 9.044,
       "step": 298
     },
     {
       "epoch": 1.0067114093959733,
       "grad_norm": 0.0,
-      "learning_rate": 1.54934077762777e-05,
       "loss": 4.6071,
       "step": 300
     },
     {
       "epoch": 1.0906040268456376,
       "grad_norm": 0.0,
-      "learning_rate": 1.4714965981838503e-05,
       "loss": 4.5226,
       "step": 325
     },
     {
       "epoch": 1.174496644295302,
       "grad_norm": 0.0,
-      "learning_rate": 1.3897858732926794e-05,
       "loss": 4.4466,
       "step": 350
     },
     {
       "epoch": 1.2583892617449663,
       "grad_norm": 0.0,
-      "learning_rate": 1.3048786782687706e-05,
       "loss": 4.4967,
       "step": 375
     },
     {
       "epoch": 1.342281879194631,
       "grad_norm": 0.0,
-      "learning_rate": 1.2174713013348227e-05,
       "loss": 4.7321,
       "step": 400
     },
     {
       "epoch": 1.4261744966442953,
       "grad_norm": 0.0,
-      "learning_rate": 1.128280533654637e-05,
       "loss": 4.6957,
       "step": 425
     },
     {
       "epoch": 1.5100671140939599,
       "grad_norm": 0.0,
-      "learning_rate": 1.0380377912300231e-05,
       "loss": 4.5298,
       "step": 450
     },
     {
       "epoch": 1.5939597315436242,
       "grad_norm": 0.0,
-      "learning_rate": 9.474831168655596e-06,
       "loss": 4.359,
       "step": 475
     },
     {
       "epoch": 1.6778523489932886,
       "grad_norm": 0.0,
-      "learning_rate": 8.573591113885695e-06,
       "loss": 4.5875,
       "step": 500
     },
     {
       "epoch": 1.761744966442953,
       "grad_norm": 0.0,
-      "learning_rate": 7.684048438918247e-06,
       "loss": 4.6772,
       "step": 525
     },
     {
       "epoch": 1.8456375838926173,
       "grad_norm": 0.0,
-      "learning_rate": 6.813497909385252e-06,
       "loss": 4.5338,
       "step": 550
     },
     {
       "epoch": 1.929530201342282,
       "grad_norm": 0.0,
-      "learning_rate": 5.969078544315747e-06,
       "loss": 4.6363,
       "step": 575
     },
     {
       "epoch": 2.0,
       "eval_loss": 4.619876861572266,
-      "eval_runtime": 2.1011,
-      "eval_samples_per_second": 59.968,
-      "eval_steps_per_second": 7.615,
       "step": 596
-    },
-    {
-      "epoch": 2.0134228187919465,
-      "grad_norm": 0.0,
-      "learning_rate": 5.157715072041094e-06,
-      "loss": 4.6935,
-      "step": 600
-    },
-    {
-      "epoch": 2.097315436241611,
-      "grad_norm": 0.0,
-      "learning_rate": 4.386061143408135e-06,
-      "loss": 4.5611,
-      "step": 625
-    },
-    {
-      "epoch": 2.1812080536912752,
-      "grad_norm": 0.0,
-      "learning_rate": 3.660444767984911e-06,
-      "loss": 4.4651,
-      "step": 650
-    },
-    {
-      "epoch": 2.2651006711409396,
-      "grad_norm": 0.0,
-      "learning_rate": 2.986816420713662e-06,
-      "loss": 4.6059,
-      "step": 675
-    },
-    {
-      "epoch": 2.348993288590604,
-      "grad_norm": 0.0,
-      "learning_rate": 2.370700244566605e-06,
-      "loss": 4.4503,
-      "step": 700
-    },
-    {
-      "epoch": 2.4328859060402683,
-      "grad_norm": 0.0,
-      "learning_rate": 1.8171487493710337e-06,
-      "loss": 4.5825,
-      "step": 725
-    },
-    {
-      "epoch": 2.5167785234899327,
-      "grad_norm": 0.0,
-      "learning_rate": 1.3307013782996237e-06,
-      "loss": 4.6254,
-      "step": 750
-    },
-    {
-      "epoch": 2.600671140939597,
-      "grad_norm": 0.0,
-      "learning_rate": 9.153472818047627e-07,
-      "loss": 4.6237,
-      "step": 775
-    },
-    {
-      "epoch": 2.684563758389262,
-      "grad_norm": 0.0,
-      "learning_rate": 5.74492604272191e-07,
-      "loss": 4.4167,
-      "step": 800
-    },
-    {
-      "epoch": 2.7684563758389262,
-      "grad_norm": 0.0,
-      "learning_rate": 3.109325516623818e-07,
-      "loss": 4.6393,
-      "step": 825
-    },
-    {
-      "epoch": 2.8523489932885906,
-      "grad_norm": 0.0,
-      "learning_rate": 1.2682846920120228e-07,
-      "loss": 4.6915,
-      "step": 850
-    },
-    {
-      "epoch": 2.936241610738255,
-      "grad_norm": 0.0,
-      "learning_rate": 2.369011709604463e-08,
-      "loss": 4.7184,
-      "step": 875
-    },
-    {
-      "epoch": 3.0,
-      "eval_loss": 4.619876861572266,
-      "eval_runtime": 1.8875,
-      "eval_samples_per_second": 66.756,
-      "eval_steps_per_second": 8.477,
-      "step": 894
     }
   ],
   "logging_steps": 25,
@@ -290,12 +198,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": true
       },
       "attributes": {}
     }
   },
-  "total_flos": 3.4434735820916736e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 2.0,
   "eval_steps": 500,
+  "global_step": 596,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
     {
       "epoch": 0.08389261744966443,
       "grad_norm": 0.0,
+      "learning_rate": 0.0001851851851851852,
       "loss": 4.5636,
       "step": 25
     },
     {
       "epoch": 0.16778523489932887,
       "grad_norm": 0.0,
+      "learning_rate": 0.00019965291500546864,
       "loss": 4.5267,
       "step": 50
     },
     {
       "epoch": 0.2516778523489933,
       "grad_norm": 0.0,
+      "learning_rate": 0.0001984912443051131,
       "loss": 4.6574,
       "step": 75
     },
     {
       "epoch": 0.33557046979865773,
       "grad_norm": 0.0,
+      "learning_rate": 0.0001965218883028299,
       "loss": 4.589,
       "step": 100
     },
     {
       "epoch": 0.41946308724832215,
       "grad_norm": 0.0,
+      "learning_rate": 0.00019376099685953837,
       "loss": 4.6256,
       "step": 125
     },
     {
       "epoch": 0.5033557046979866,
       "grad_norm": 0.0,
+      "learning_rate": 0.00019023121088565352,
       "loss": 4.4971,
       "step": 150
     },
     {
       "epoch": 0.587248322147651,
       "grad_norm": 0.0,
+      "learning_rate": 0.00018596147667249458,
       "loss": 4.7099,
       "step": 175
     },
     {
       "epoch": 0.6711409395973155,
       "grad_norm": 0.0,
+      "learning_rate": 0.00018098680851591536,
       "loss": 4.619,
       "step": 200
     },
     {
       "epoch": 0.7550335570469798,
       "grad_norm": 0.0,
+      "learning_rate": 0.00017534800157877918,
       "loss": 4.6121,
       "step": 225
     },
     {
       "epoch": 0.8389261744966443,
       "grad_norm": 0.0,
+      "learning_rate": 0.00016909129734697306,
       "loss": 4.4854,
       "step": 250
     },
     {
       "epoch": 0.9228187919463087,
       "grad_norm": 0.0,
+      "learning_rate": 0.0001622680044224158,
       "loss": 4.6153,
       "step": 275
     },
     {
       "epoch": 1.0,
       "eval_loss": 4.619876861572266,
+      "eval_runtime": 1.7786,
+      "eval_samples_per_second": 70.844,
+      "eval_steps_per_second": 8.996,
       "step": 298
     },
     {
       "epoch": 1.0067114093959733,
       "grad_norm": 0.0,
+      "learning_rate": 0.00015493407776277698,
       "loss": 4.6071,
       "step": 300
     },
     {
       "epoch": 1.0906040268456376,
       "grad_norm": 0.0,
+      "learning_rate": 0.00014714965981838503,
       "loss": 4.5226,
       "step": 325
     },
     {
       "epoch": 1.174496644295302,
       "grad_norm": 0.0,
+      "learning_rate": 0.00013897858732926793,
       "loss": 4.4466,
       "step": 350
     },
     {
       "epoch": 1.2583892617449663,
       "grad_norm": 0.0,
+      "learning_rate": 0.00013048786782687705,
       "loss": 4.4967,
       "step": 375
     },
     {
       "epoch": 1.342281879194631,
       "grad_norm": 0.0,
+      "learning_rate": 0.00012174713013348226,
       "loss": 4.7321,
       "step": 400
     },
     {
       "epoch": 1.4261744966442953,
       "grad_norm": 0.0,
+      "learning_rate": 0.0001128280533654637,
       "loss": 4.6957,
       "step": 425
     },
     {
       "epoch": 1.5100671140939599,
       "grad_norm": 0.0,
+      "learning_rate": 0.0001038037791230023,
       "loss": 4.5298,
       "step": 450
     },
     {
       "epoch": 1.5939597315436242,
       "grad_norm": 0.0,
+      "learning_rate": 9.474831168655595e-05,
       "loss": 4.359,
       "step": 475
     },
     {
       "epoch": 1.6778523489932886,
       "grad_norm": 0.0,
+      "learning_rate": 8.573591113885694e-05,
       "loss": 4.5875,
       "step": 500
     },
     {
       "epoch": 1.761744966442953,
       "grad_norm": 0.0,
+      "learning_rate": 7.684048438918248e-05,
       "loss": 4.6772,
       "step": 525
     },
     {
       "epoch": 1.8456375838926173,
       "grad_norm": 0.0,
+      "learning_rate": 6.813497909385251e-05,
       "loss": 4.5338,
       "step": 550
     },
     {
       "epoch": 1.929530201342282,
       "grad_norm": 0.0,
+      "learning_rate": 5.9690785443157474e-05,
       "loss": 4.6363,
       "step": 575
     },
     {
       "epoch": 2.0,
       "eval_loss": 4.619876861572266,
+      "eval_runtime": 1.8399,
+      "eval_samples_per_second": 68.481,
+      "eval_steps_per_second": 8.696,
       "step": 596
     }
   ],
   "logging_steps": 25,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": false
       },
       "attributes": {}
     }
   },
+  "total_flos": 2.298711528064819e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

last-checkpoint/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b6697f2864903978d31dd6ca1fe39ca44ced565a75f65d198b42ce2ce420093a
 size 5368

 version https://git-lfs.github.com/spec/v1
+oid sha256:857c3d3b90faeb2a01802e5fabcb10a4e6cd4564a0b43dbff661f690802b0ddb
 size 5368