Training in progress, epoch 18, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +103 -4

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:48c4ff7c723a1e608d989b96e44d892070778ec83c952e55c57dddf3c3f48178
 size 1227009528

 version https://git-lfs.github.com/spec/v1
+oid sha256:984c4064387b5d2acb50bbb73550ab8e59284bb7304c8ae2481fd6b52ff38e6a
 size 1227009528

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4d0a831ac586fb1be369d61d6f0cc41522b60258bcdd25b76e42085747f41185
 size 2454133690

 version https://git-lfs.github.com/spec/v1
+oid sha256:69dce8d19e012dd3ce3e9993c52659d81190417ccecc2e0abd73083314d194c9
 size 2454133690

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:46332136f176afc146e73eb2e78a93d7beb2c41f4d0c62f6c39855d4ba7b1979
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:50b8fc98da5c711399560346c0f91ffc2f6dec6609f8c3720258169ba848b497
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:855574b974c7e1ac9f8ed715f000f5b33d3a42b4a2102f47eec78477a0831f87
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:2b8e678712001404d086acc835dbbf2d80efd02cf92f10187d53131141ee5f90
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 34.54485321044922,
   "best_model_checkpoint": "/kaggle/working/output/checkpoint-20880",
-  "epoch": 17.0,
   "eval_steps": 500,
-  "global_step": 22185,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1690,6 +1690,105 @@
       "eval_samples_per_second": 26.464,
       "eval_steps_per_second": 3.326,
       "step": 22185
     }
   ],
   "logging_steps": 100,
@@ -1704,7 +1803,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 1
       }
     },
     "TrainerControl": {
@@ -1718,7 +1817,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 2.3924285351801856e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 34.54485321044922,
   "best_model_checkpoint": "/kaggle/working/output/checkpoint-20880",
+  "epoch": 18.0,
   "eval_steps": 500,
+  "global_step": 23490,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 26.464,
       "eval_steps_per_second": 3.326,
       "step": 22185
+    },
+    {
+      "epoch": 17.011494252873565,
+      "grad_norm": 4.7622528076171875,
+      "learning_rate": 3.937260536398468e-05,
+      "loss": 34.3268,
+      "step": 22200
+    },
+    {
+      "epoch": 17.088122605363985,
+      "grad_norm": 2.9908533096313477,
+      "learning_rate": 3.9324712643678164e-05,
+      "loss": 33.4477,
+      "step": 22300
+    },
+    {
+      "epoch": 17.164750957854405,
+      "grad_norm": 2.2341110706329346,
+      "learning_rate": 3.927681992337165e-05,
+      "loss": 33.6793,
+      "step": 22400
+    },
+    {
+      "epoch": 17.24137931034483,
+      "grad_norm": 2.3946852684020996,
+      "learning_rate": 3.922892720306514e-05,
+      "loss": 33.2578,
+      "step": 22500
+    },
+    {
+      "epoch": 17.31800766283525,
+      "grad_norm": 3.3899614810943604,
+      "learning_rate": 3.9181034482758625e-05,
+      "loss": 33.2486,
+      "step": 22600
+    },
+    {
+      "epoch": 17.39463601532567,
+      "grad_norm": 5.150006294250488,
+      "learning_rate": 3.9133141762452105e-05,
+      "loss": 33.0265,
+      "step": 22700
+    },
+    {
+      "epoch": 17.47126436781609,
+      "grad_norm": 2.8135523796081543,
+      "learning_rate": 3.908524904214559e-05,
+      "loss": 33.4384,
+      "step": 22800
+    },
+    {
+      "epoch": 17.547892720306514,
+      "grad_norm": 2.5454325675964355,
+      "learning_rate": 3.903735632183908e-05,
+      "loss": 33.4139,
+      "step": 22900
+    },
+    {
+      "epoch": 17.624521072796934,
+      "grad_norm": 4.680717945098877,
+      "learning_rate": 3.8989463601532566e-05,
+      "loss": 34.0209,
+      "step": 23000
+    },
+    {
+      "epoch": 17.701149425287355,
+      "grad_norm": 4.242103099822998,
+      "learning_rate": 3.894157088122606e-05,
+      "loss": 33.1372,
+      "step": 23100
+    },
+    {
+      "epoch": 17.77777777777778,
+      "grad_norm": 2.639352798461914,
+      "learning_rate": 3.8893678160919546e-05,
+      "loss": 33.3558,
+      "step": 23200
+    },
+    {
+      "epoch": 17.8544061302682,
+      "grad_norm": 1.9746617078781128,
+      "learning_rate": 3.884578544061303e-05,
+      "loss": 33.7639,
+      "step": 23300
+    },
+    {
+      "epoch": 17.93103448275862,
+      "grad_norm": 4.005228519439697,
+      "learning_rate": 3.879837164750958e-05,
+      "loss": 33.0241,
+      "step": 23400
+    },
+    {
+      "epoch": 18.0,
+      "eval_loss": 34.649261474609375,
+      "eval_runtime": 49.2606,
+      "eval_samples_per_second": 26.492,
+      "eval_steps_per_second": 3.329,
+      "step": 23490
     }
   ],
   "logging_steps": 100,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 2
       }
     },
     "TrainerControl": {
       "attributes": {}
     }
   },
+  "total_flos": 2.5331596254849024e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null