Training in progress, epoch 31, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +103 -4

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2803b5ea78a77c33b077e0060ee73753f95d174d834b993bc91485092dcde1c9
 size 1227009528

 version https://git-lfs.github.com/spec/v1
+oid sha256:2719c12471af591c878a21526618319ad4ac35dce0e07dad1360e72204de1768
 size 1227009528

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:32e5494189f649e0e13e6f06d32a81e216f34cb7f0ab96e7dcedf6d8cc0cf2ad
 size 2454133690

 version https://git-lfs.github.com/spec/v1
+oid sha256:062cca4f6db12b1f947fb25e682653530fbcd8ea11dd9b4ae6ede9bedeb50d81
 size 2454133690

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:14e90b56db58b69cb5e05f5a06dece018d9fd278779eef8662306f8a599fad84
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:1d7fa531d7265d7bc31fddb022d4f0400b7f58d98abd17e982bc79b081e31451
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2f3041d0db6b547a1562ecb7021cfeb0bcc92669d8d944852d4251894eeac567
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:b4fc7201efed2cc22744d4d8152589ca043a54ef0c596de51adaecf88fdd063e
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 34.4583740234375,
   "best_model_checkpoint": "/kaggle/working/output/checkpoint-36540",
-  "epoch": 30.0,
   "eval_steps": 500,
-  "global_step": 39150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2984,6 +2984,105 @@
       "eval_samples_per_second": 26.403,
       "eval_steps_per_second": 3.318,
       "step": 39150
     }
   ],
   "logging_steps": 100,
@@ -2998,7 +3097,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 2
       }
     },
     "TrainerControl": {
@@ -3012,7 +3111,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 4.221932709141504e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 34.4583740234375,
   "best_model_checkpoint": "/kaggle/working/output/checkpoint-36540",
+  "epoch": 31.0,
   "eval_steps": 500,
+  "global_step": 40455,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 26.403,
       "eval_steps_per_second": 3.318,
       "step": 39150
+    },
+    {
+      "epoch": 30.038314176245212,
+      "grad_norm": 4.915451526641846,
+      "learning_rate": 3.123467432950192e-05,
+      "loss": 33.0369,
+      "step": 39200
+    },
+    {
+      "epoch": 30.114942528735632,
+      "grad_norm": 4.369636058807373,
+      "learning_rate": 3.1186781609195405e-05,
+      "loss": 33.1459,
+      "step": 39300
+    },
+    {
+      "epoch": 30.191570881226053,
+      "grad_norm": 2.9162957668304443,
+      "learning_rate": 3.113888888888889e-05,
+      "loss": 32.9688,
+      "step": 39400
+    },
+    {
+      "epoch": 30.268199233716476,
+      "grad_norm": 4.7777628898620605,
+      "learning_rate": 3.109099616858238e-05,
+      "loss": 33.7249,
+      "step": 39500
+    },
+    {
+      "epoch": 30.344827586206897,
+      "grad_norm": 3.651850700378418,
+      "learning_rate": 3.104310344827586e-05,
+      "loss": 33.4887,
+      "step": 39600
+    },
+    {
+      "epoch": 30.421455938697317,
+      "grad_norm": 3.29491925239563,
+      "learning_rate": 3.0995210727969346e-05,
+      "loss": 33.5714,
+      "step": 39700
+    },
+    {
+      "epoch": 30.49808429118774,
+      "grad_norm": 3.9116616249084473,
+      "learning_rate": 3.094731800766283e-05,
+      "loss": 33.7763,
+      "step": 39800
+    },
+    {
+      "epoch": 30.57471264367816,
+      "grad_norm": Infinity,
+      "learning_rate": 3.089990421455939e-05,
+      "loss": 32.1907,
+      "step": 39900
+    },
+    {
+      "epoch": 30.65134099616858,
+      "grad_norm": 3.237652063369751,
+      "learning_rate": 3.085201149425287e-05,
+      "loss": 33.344,
+      "step": 40000
+    },
+    {
+      "epoch": 30.727969348659006,
+      "grad_norm": 4.286235809326172,
+      "learning_rate": 3.080459770114943e-05,
+      "loss": 33.181,
+      "step": 40100
+    },
+    {
+      "epoch": 30.804597701149426,
+      "grad_norm": 2.6222527027130127,
+      "learning_rate": 3.075670498084292e-05,
+      "loss": 33.3407,
+      "step": 40200
+    },
+    {
+      "epoch": 30.881226053639846,
+      "grad_norm": 3.7431180477142334,
+      "learning_rate": 3.0708812260536404e-05,
+      "loss": 33.1109,
+      "step": 40300
+    },
+    {
+      "epoch": 30.957854406130267,
+      "grad_norm": 3.0706677436828613,
+      "learning_rate": 3.066091954022989e-05,
+      "loss": 33.3504,
+      "step": 40400
+    },
+    {
+      "epoch": 31.0,
+      "eval_loss": 34.48047637939453,
+      "eval_runtime": 49.4044,
+      "eval_samples_per_second": 26.415,
+      "eval_steps_per_second": 3.32,
+      "step": 40455
     }
   ],
   "logging_steps": 100,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 3
       }
     },
     "TrainerControl": {
       "attributes": {}
     }
   },
+  "total_flos": 4.362663799446221e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null