Training in progress, epoch 27, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +105 -6

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:32e71eff0d61a84e366db8484360ee95668b8ea2394397d8e4e593ed6e3506b9
 size 1227009528

 version https://git-lfs.github.com/spec/v1
+oid sha256:6578b9c4d75748b6280cbd87cff77c51810b19211f34f787c2c20a45bba7f69a
 size 1227009528

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6a7fb9eb6216192ce22490824fdf15cbc77c71775a288290e6a20a495b35fff2
 size 2454133690

 version https://git-lfs.github.com/spec/v1
+oid sha256:ebfa5e3b95d0b2043f86b91a91689cd4bbf2940ae6a300f2f266d892b9b6aa2a
 size 2454133690

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a30464a0259a27646e9e215f48107081e6b2f053cf9eb8cd5707cfedf93581ee
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:701bb29f6de724269b66c860403f8e84b96f5ac92e54ca67ad415c85cf650a00
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:68265dc14a2d04db70bc11c529ede2fb6197ce4bf18435eda07a0317b0f9a6b4
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:bf4aead81b9bf3b888621453ce55acb21aa6ea9c3c6f5c0de4c20591eb5cb4b1
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 34.53865432739258,
-  "best_model_checkpoint": "/kaggle/working/output/checkpoint-28710",
-  "epoch": 26.0,
   "eval_steps": 500,
-  "global_step": 33930,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2588,6 +2588,105 @@
       "eval_samples_per_second": 26.471,
       "eval_steps_per_second": 3.327,
       "step": 33930
     }
   ],
   "logging_steps": 100,
@@ -2602,7 +2701,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 4
       }
     },
     "TrainerControl": {
@@ -2616,7 +2715,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 3.659008347922637e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 34.47444152832031,
+  "best_model_checkpoint": "/kaggle/working/output/checkpoint-35235",
+  "epoch": 27.0,
   "eval_steps": 500,
+  "global_step": 35235,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 26.471,
       "eval_steps_per_second": 3.327,
       "step": 33930
+    },
+    {
+      "epoch": 26.053639846743295,
+      "grad_norm": 3.76274037361145,
+      "learning_rate": 3.372461685823755e-05,
+      "loss": 33.5457,
+      "step": 34000
+    },
+    {
+      "epoch": 26.130268199233715,
+      "grad_norm": 5.3265061378479,
+      "learning_rate": 3.3676724137931036e-05,
+      "loss": 33.2299,
+      "step": 34100
+    },
+    {
+      "epoch": 26.20689655172414,
+      "grad_norm": 4.5878987312316895,
+      "learning_rate": 3.362883141762452e-05,
+      "loss": 33.7869,
+      "step": 34200
+    },
+    {
+      "epoch": 26.28352490421456,
+      "grad_norm": 3.673882007598877,
+      "learning_rate": 3.358093869731801e-05,
+      "loss": 32.6976,
+      "step": 34300
+    },
+    {
+      "epoch": 26.36015325670498,
+      "grad_norm": 3.5689809322357178,
+      "learning_rate": 3.35330459770115e-05,
+      "loss": 33.6335,
+      "step": 34400
+    },
+    {
+      "epoch": 26.436781609195403,
+      "grad_norm": 5.735408306121826,
+      "learning_rate": 3.3485153256704984e-05,
+      "loss": 33.116,
+      "step": 34500
+    },
+    {
+      "epoch": 26.513409961685824,
+      "grad_norm": 1.9485822916030884,
+      "learning_rate": 3.343726053639847e-05,
+      "loss": 33.9434,
+      "step": 34600
+    },
+    {
+      "epoch": 26.590038314176244,
+      "grad_norm": 4.049289226531982,
+      "learning_rate": 3.338936781609196e-05,
+      "loss": 33.2275,
+      "step": 34700
+    },
+    {
+      "epoch": 26.666666666666668,
+      "grad_norm": 2.392415761947632,
+      "learning_rate": 3.334147509578544e-05,
+      "loss": 33.1124,
+      "step": 34800
+    },
+    {
+      "epoch": 26.743295019157088,
+      "grad_norm": 3.01650333404541,
+      "learning_rate": 3.3293582375478924e-05,
+      "loss": 33.0733,
+      "step": 34900
+    },
+    {
+      "epoch": 26.81992337164751,
+      "grad_norm": 3.1701831817626953,
+      "learning_rate": 3.324568965517241e-05,
+      "loss": 33.0029,
+      "step": 35000
+    },
+    {
+      "epoch": 26.896551724137932,
+      "grad_norm": 2.6294217109680176,
+      "learning_rate": 3.3197796934865905e-05,
+      "loss": 32.9881,
+      "step": 35100
+    },
+    {
+      "epoch": 26.973180076628353,
+      "grad_norm": 3.4342799186706543,
+      "learning_rate": 3.314990421455939e-05,
+      "loss": 33.047,
+      "step": 35200
+    },
+    {
+      "epoch": 27.0,
+      "eval_loss": 34.47444152832031,
+      "eval_runtime": 49.2859,
+      "eval_samples_per_second": 26.478,
+      "eval_steps_per_second": 3.328,
+      "step": 35235
     }
   ],
   "logging_steps": 100,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 0
       }
     },
     "TrainerControl": {
       "attributes": {}
     }
   },
+  "total_flos": 3.799739438227354e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null