Training in progress, epoch 26, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +103 -4

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ebf6b113805e8d5c18f20cc3a7f743cea1ac029ed8f4448a7b46de82a6c516e9
 size 1227009528

 version https://git-lfs.github.com/spec/v1
+oid sha256:32e71eff0d61a84e366db8484360ee95668b8ea2394397d8e4e593ed6e3506b9
 size 1227009528

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7534969a274bb3fa8023a906c28ee9fb96fa28e85e22f56fbd7e7b549d41dd80
 size 2454133690

 version https://git-lfs.github.com/spec/v1
+oid sha256:6a7fb9eb6216192ce22490824fdf15cbc77c71775a288290e6a20a495b35fff2
 size 2454133690

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6f7a8d60a2f79f9fed2cea73d23dd3dfda5f5e479acfb4213a6f2e863cb76904
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:a30464a0259a27646e9e215f48107081e6b2f053cf9eb8cd5707cfedf93581ee
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9cbd4af8c4ab3cb75893cf7c4c12466d6c795077167416da697449ce4a12b474
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:68265dc14a2d04db70bc11c529ede2fb6197ce4bf18435eda07a0317b0f9a6b4
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 34.53865432739258,
   "best_model_checkpoint": "/kaggle/working/output/checkpoint-28710",
-  "epoch": 25.0,
   "eval_steps": 500,
-  "global_step": 32625,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2489,6 +2489,105 @@
       "eval_samples_per_second": 26.448,
       "eval_steps_per_second": 3.324,
       "step": 32625
     }
   ],
   "logging_steps": 100,
@@ -2503,7 +2602,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 3
       }
     },
     "TrainerControl": {
@@ -2517,7 +2616,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 3.51827725761792e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 34.53865432739258,
   "best_model_checkpoint": "/kaggle/working/output/checkpoint-28710",
+  "epoch": 26.0,
   "eval_steps": 500,
+  "global_step": 33930,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 26.448,
       "eval_steps_per_second": 3.324,
       "step": 32625
+    },
+    {
+      "epoch": 25.057471264367816,
+      "grad_norm": 2.94795823097229,
+      "learning_rate": 3.43462643678161e-05,
+      "loss": 33.1012,
+      "step": 32700
+    },
+    {
+      "epoch": 25.134099616858236,
+      "grad_norm": 2.3455259799957275,
+      "learning_rate": 3.4298371647509584e-05,
+      "loss": 33.1345,
+      "step": 32800
+    },
+    {
+      "epoch": 25.21072796934866,
+      "grad_norm": 2.678739547729492,
+      "learning_rate": 3.4250478927203064e-05,
+      "loss": 33.2271,
+      "step": 32900
+    },
+    {
+      "epoch": 25.28735632183908,
+      "grad_norm": 4.3170952796936035,
+      "learning_rate": 3.420258620689655e-05,
+      "loss": 33.0392,
+      "step": 33000
+    },
+    {
+      "epoch": 25.3639846743295,
+      "grad_norm": 3.8895034790039062,
+      "learning_rate": 3.415469348659004e-05,
+      "loss": 33.2535,
+      "step": 33100
+    },
+    {
+      "epoch": 25.440613026819925,
+      "grad_norm": 3.693235158920288,
+      "learning_rate": 3.4106800766283525e-05,
+      "loss": 33.4471,
+      "step": 33200
+    },
+    {
+      "epoch": 25.517241379310345,
+      "grad_norm": 5.521793365478516,
+      "learning_rate": 3.405890804597701e-05,
+      "loss": 34.2142,
+      "step": 33300
+    },
+    {
+      "epoch": 25.593869731800766,
+      "grad_norm": 2.8983964920043945,
+      "learning_rate": 3.40110153256705e-05,
+      "loss": 34.362,
+      "step": 33400
+    },
+    {
+      "epoch": 25.67049808429119,
+      "grad_norm": 3.329155206680298,
+      "learning_rate": 3.396360153256705e-05,
+      "loss": 32.373,
+      "step": 33500
+    },
+    {
+      "epoch": 25.74712643678161,
+      "grad_norm": 2.6269519329071045,
+      "learning_rate": 3.391570881226054e-05,
+      "loss": 33.1401,
+      "step": 33600
+    },
+    {
+      "epoch": 25.82375478927203,
+      "grad_norm": 3.1628787517547607,
+      "learning_rate": 3.3867816091954024e-05,
+      "loss": 33.2718,
+      "step": 33700
+    },
+    {
+      "epoch": 25.900383141762454,
+      "grad_norm": 3.0653462409973145,
+      "learning_rate": 3.381992337164751e-05,
+      "loss": 33.481,
+      "step": 33800
+    },
+    {
+      "epoch": 25.977011494252874,
+      "grad_norm": 2.5874106884002686,
+      "learning_rate": 3.377250957854406e-05,
+      "loss": 33.2467,
+      "step": 33900
+    },
+    {
+      "epoch": 26.0,
+      "eval_loss": 34.54924392700195,
+      "eval_runtime": 49.3,
+      "eval_samples_per_second": 26.471,
+      "eval_steps_per_second": 3.327,
+      "step": 33930
     }
   ],
   "logging_steps": 100,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 4
       }
     },
     "TrainerControl": {
       "attributes": {}
     }
   },
+  "total_flos": 3.659008347922637e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null