Training in progress, epoch 20, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +110 -4

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ca9c0fa45ca737106eab6be7425caef617a753946ed0508c1aef1f3a4291004d
 size 1227009528

 version https://git-lfs.github.com/spec/v1
+oid sha256:6f875ff0bfa8c3f03718200317018a9c1320ef659ed8be49eb8d1545f90dca2b
 size 1227009528

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:acbe2b1c3985ee1c5128707c924ffa1b789c4d55f3b36a9e00d5043900ec85eb
 size 2454133690

 version https://git-lfs.github.com/spec/v1
+oid sha256:4edde3d63fc51cb76d2b8798e35123bee17f10b37ac6770074f24fbb1849dc32
 size 2454133690

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9f0baebfc08807f25c4a6326e1681bcfbdd8c24e4c42d43ef5df074269e679b0
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:5b0066a3b21610aa70bfcf0b5c4ca5da7f43ab12c9e601ab15813e745474a36d
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0854ef6140ae5c2ba277188e63d5e8a4b0a0fea517aba028586326cadbf26b4b
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:d8c5b80067b711daea816f97793263fb6b6d08534034a3999a4ce7590fa85de8
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 34.54485321044922,
   "best_model_checkpoint": "/kaggle/working/output/checkpoint-20880",
-  "epoch": 19.0,
   "eval_steps": 500,
-  "global_step": 24795,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1888,6 +1888,112 @@
       "eval_samples_per_second": 26.464,
       "eval_steps_per_second": 3.326,
       "step": 24795
     }
   ],
   "logging_steps": 100,
@@ -1902,7 +2008,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 3
       }
     },
     "TrainerControl": {
@@ -1916,7 +2022,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 2.673890715789619e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 34.54485321044922,
   "best_model_checkpoint": "/kaggle/working/output/checkpoint-20880",
+  "epoch": 20.0,
   "eval_steps": 500,
+  "global_step": 26100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 26.464,
       "eval_steps_per_second": 3.326,
       "step": 24795
+    },
+    {
+      "epoch": 19.00383141762452,
+      "grad_norm": 3.04927659034729,
+      "learning_rate": 3.8127873563218394e-05,
+      "loss": 33.7055,
+      "step": 24800
+    },
+    {
+      "epoch": 19.080459770114942,
+      "grad_norm": 2.725443124771118,
+      "learning_rate": 3.8079980842911874e-05,
+      "loss": 33.5355,
+      "step": 24900
+    },
+    {
+      "epoch": 19.157088122605366,
+      "grad_norm": 3.853895425796509,
+      "learning_rate": 3.803208812260536e-05,
+      "loss": 33.5267,
+      "step": 25000
+    },
+    {
+      "epoch": 19.233716475095786,
+      "grad_norm": 2.666419267654419,
+      "learning_rate": 3.798419540229885e-05,
+      "loss": 33.4069,
+      "step": 25100
+    },
+    {
+      "epoch": 19.310344827586206,
+      "grad_norm": 3.5618317127227783,
+      "learning_rate": 3.793630268199234e-05,
+      "loss": 33.7295,
+      "step": 25200
+    },
+    {
+      "epoch": 19.386973180076627,
+      "grad_norm": 3.351062297821045,
+      "learning_rate": 3.788840996168583e-05,
+      "loss": 33.1994,
+      "step": 25300
+    },
+    {
+      "epoch": 19.46360153256705,
+      "grad_norm": 3.3226547241210938,
+      "learning_rate": 3.7840996168582374e-05,
+      "loss": 33.3149,
+      "step": 25400
+    },
+    {
+      "epoch": 19.54022988505747,
+      "grad_norm": 4.15867805480957,
+      "learning_rate": 3.779310344827586e-05,
+      "loss": 33.5592,
+      "step": 25500
+    },
+    {
+      "epoch": 19.61685823754789,
+      "grad_norm": 2.333674430847168,
+      "learning_rate": 3.774521072796935e-05,
+      "loss": 33.7336,
+      "step": 25600
+    },
+    {
+      "epoch": 19.693486590038315,
+      "grad_norm": 2.9516782760620117,
+      "learning_rate": 3.7697318007662834e-05,
+      "loss": 33.3228,
+      "step": 25700
+    },
+    {
+      "epoch": 19.770114942528735,
+      "grad_norm": 1.734508991241455,
+      "learning_rate": 3.764942528735632e-05,
+      "loss": 33.3216,
+      "step": 25800
+    },
+    {
+      "epoch": 19.846743295019156,
+      "grad_norm": 2.4886648654937744,
+      "learning_rate": 3.760153256704981e-05,
+      "loss": 33.5157,
+      "step": 25900
+    },
+    {
+      "epoch": 19.92337164750958,
+      "grad_norm": 3.6624252796173096,
+      "learning_rate": 3.75536398467433e-05,
+      "loss": 33.2399,
+      "step": 26000
+    },
+    {
+      "epoch": 20.0,
+      "grad_norm": 4.810445785522461,
+      "learning_rate": 3.750574712643679e-05,
+      "loss": 32.548,
+      "step": 26100
+    },
+    {
+      "epoch": 20.0,
+      "eval_loss": 34.746856689453125,
+      "eval_runtime": 49.2861,
+      "eval_samples_per_second": 26.478,
+      "eval_steps_per_second": 3.328,
+      "step": 26100
     }
   ],
   "logging_steps": 100,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 4
       }
     },
     "TrainerControl": {
       "attributes": {}
     }
   },
+  "total_flos": 2.814621806094336e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null