Training in progress, epoch 33, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +105 -6

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c42a1f9b7d4161ba1bae9d13deadbeb763fe153dec6526f8caefd629b413acef
 size 1227009528

 version https://git-lfs.github.com/spec/v1
+oid sha256:2159f89156ff4dcd1758b09775f13ff807bf88e997041dac2f177d65d0f6cad3
 size 1227009528

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:192e681f7b02b75d8744a13995bf0b21ddfec28797c5b2117fc39e50a373e7e3
 size 2454133690

 version https://git-lfs.github.com/spec/v1
+oid sha256:7f5e4fa380d2fa2203229213d42b423cf11ccfb6cff79739b5f639ce444c7cee
 size 2454133690

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f2c4db18d5a66e89d462b11782d22327d30dcce66816316fdc2ddae7a53a0ffe
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:32a902a960b0fe553ebdc19d7e0271cba80d3ee51b2c90eb8cc26761d030d21d
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ccb7fdd86b6cca8b702924e4a75b448b78185ab1cd7362e327e12720bd73d09b
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:4666922908bb339b0a4b434cca7a5683a807e200bb56b9f1cacc195313b5081d
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 34.4583740234375,
-  "best_model_checkpoint": "/kaggle/working/output/checkpoint-36540",
-  "epoch": 32.0,
   "eval_steps": 500,
-  "global_step": 41760,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -3182,6 +3182,105 @@
       "eval_samples_per_second": 26.415,
       "eval_steps_per_second": 3.32,
       "step": 41760
     }
   ],
   "logging_steps": 100,
@@ -3196,7 +3295,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 4
       }
     },
     "TrainerControl": {
@@ -3210,7 +3309,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 4.503394889750938e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 34.45762252807617,
+  "best_model_checkpoint": "/kaggle/working/output/checkpoint-43065",
+  "epoch": 33.0,
   "eval_steps": 500,
+  "global_step": 43065,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 26.415,
       "eval_steps_per_second": 3.32,
       "step": 41760
+    },
+    {
+      "epoch": 32.030651340996165,
+      "grad_norm": 3.481757164001465,
+      "learning_rate": 2.99904214559387e-05,
+      "loss": 33.4618,
+      "step": 41800
+    },
+    {
+      "epoch": 32.10727969348659,
+      "grad_norm": 3.9191551208496094,
+      "learning_rate": 2.9942528735632187e-05,
+      "loss": 33.627,
+      "step": 41900
+    },
+    {
+      "epoch": 32.18390804597701,
+      "grad_norm": 5.722991466522217,
+      "learning_rate": 2.989463601532567e-05,
+      "loss": 32.705,
+      "step": 42000
+    },
+    {
+      "epoch": 32.26053639846743,
+      "grad_norm": 4.626276016235352,
+      "learning_rate": 2.9846743295019157e-05,
+      "loss": 33.4211,
+      "step": 42100
+    },
+    {
+      "epoch": 32.337164750957854,
+      "grad_norm": 2.526745557785034,
+      "learning_rate": 2.9798850574712644e-05,
+      "loss": 32.9605,
+      "step": 42200
+    },
+    {
+      "epoch": 32.41379310344828,
+      "grad_norm": 2.2517364025115967,
+      "learning_rate": 2.975095785440613e-05,
+      "loss": 33.1264,
+      "step": 42300
+    },
+    {
+      "epoch": 32.490421455938694,
+      "grad_norm": 5.5678606033325195,
+      "learning_rate": 2.9703065134099618e-05,
+      "loss": 33.1141,
+      "step": 42400
+    },
+    {
+      "epoch": 32.56704980842912,
+      "grad_norm": 3.7891595363616943,
+      "learning_rate": 2.96551724137931e-05,
+      "loss": 33.3294,
+      "step": 42500
+    },
+    {
+      "epoch": 32.64367816091954,
+      "grad_norm": 3.350956916809082,
+      "learning_rate": 2.960727969348659e-05,
+      "loss": 33.6182,
+      "step": 42600
+    },
+    {
+      "epoch": 32.72030651340996,
+      "grad_norm": 3.138821601867676,
+      "learning_rate": 2.9559386973180075e-05,
+      "loss": 33.2219,
+      "step": 42700
+    },
+    {
+      "epoch": 32.79693486590038,
+      "grad_norm": 3.301961898803711,
+      "learning_rate": 2.9511494252873566e-05,
+      "loss": 33.5015,
+      "step": 42800
+    },
+    {
+      "epoch": 32.87356321839081,
+      "grad_norm": 3.0760138034820557,
+      "learning_rate": 2.9463601532567052e-05,
+      "loss": 33.3376,
+      "step": 42900
+    },
+    {
+      "epoch": 32.95019157088122,
+      "grad_norm": 2.474372625350952,
+      "learning_rate": 2.941570881226054e-05,
+      "loss": 32.9016,
+      "step": 43000
+    },
+    {
+      "epoch": 33.0,
+      "eval_loss": 34.45762252807617,
+      "eval_runtime": 49.4143,
+      "eval_samples_per_second": 26.409,
+      "eval_steps_per_second": 3.319,
+      "step": 43065
     }
   ],
   "logging_steps": 100,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 0
       }
     },
     "TrainerControl": {
       "attributes": {}
     }
   },
+  "total_flos": 4.644125980055654e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null