Training in progress, epoch 12, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +103 -4

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:db4b5d9091a6dbab9d2b4be7cf992134ba4a3e0d729e96284bc4512ac0932620
 size 1227009528

 version https://git-lfs.github.com/spec/v1
+oid sha256:d087532161fc3d3113f958d4327ca8ab76fb93d1b9005d7b72d8341648a7f95e
 size 1227009528

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7d9dbc9961f1b825d07e327826af5885ae6801dfe3867c659b03e9c90764c433
 size 2454133690

 version https://git-lfs.github.com/spec/v1
+oid sha256:d359385b3376fb641197873abbd6f199bc67d84ad37382d398095c1f51b664a9
 size 2454133690

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1671761f2a32f97e49b389d83fe64fe54fae391ec682766d59ea01e911801f0d
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:3c4d71a933e8a99a1b5e03ca178837d4af39c5cb9255b1959f57ce6925e566d0
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8a8f171c30ec70e8b7de39e28734b3eb14c402c92c5675eccaa14ecf588e6cff
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:0f18daae1b94bcadba9e921cdd5d160fa2fe3e4c34c14e032eed270d5a8a3cca
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 34.700294494628906,
   "best_model_checkpoint": "/kaggle/working/output/checkpoint-13050",
-  "epoch": 11.0,
   "eval_steps": 500,
-  "global_step": 14355,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1096,6 +1096,105 @@
       "eval_samples_per_second": 26.474,
       "eval_steps_per_second": 3.327,
       "step": 14355
     }
   ],
   "logging_steps": 100,
@@ -1110,7 +1209,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 1
       }
     },
     "TrainerControl": {
@@ -1124,7 +1223,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.5480419933518848e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 34.700294494628906,
   "best_model_checkpoint": "/kaggle/working/output/checkpoint-13050",
+  "epoch": 12.0,
   "eval_steps": 500,
+  "global_step": 15660,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 26.474,
       "eval_steps_per_second": 3.327,
       "step": 14355
+    },
+    {
+      "epoch": 11.03448275862069,
+      "grad_norm": 2.7724273204803467,
+      "learning_rate": 4.310632183908046e-05,
+      "loss": 33.7759,
+      "step": 14400
+    },
+    {
+      "epoch": 11.11111111111111,
+      "grad_norm": 3.9663071632385254,
+      "learning_rate": 4.305842911877395e-05,
+      "loss": 33.6063,
+      "step": 14500
+    },
+    {
+      "epoch": 11.187739463601533,
+      "grad_norm": 2.53495717048645,
+      "learning_rate": 4.3010536398467435e-05,
+      "loss": 32.9251,
+      "step": 14600
+    },
+    {
+      "epoch": 11.264367816091955,
+      "grad_norm": 3.928633689880371,
+      "learning_rate": 4.296264367816092e-05,
+      "loss": 33.41,
+      "step": 14700
+    },
+    {
+      "epoch": 11.340996168582375,
+      "grad_norm": 1.888804316520691,
+      "learning_rate": 4.291475095785441e-05,
+      "loss": 33.147,
+      "step": 14800
+    },
+    {
+      "epoch": 11.417624521072797,
+      "grad_norm": 3.151488780975342,
+      "learning_rate": 4.2866858237547896e-05,
+      "loss": 34.011,
+      "step": 14900
+    },
+    {
+      "epoch": 11.494252873563218,
+      "grad_norm": 2.659867286682129,
+      "learning_rate": 4.281896551724138e-05,
+      "loss": 33.3559,
+      "step": 15000
+    },
+    {
+      "epoch": 11.57088122605364,
+      "grad_norm": 4.092405319213867,
+      "learning_rate": 4.277107279693487e-05,
+      "loss": 33.2301,
+      "step": 15100
+    },
+    {
+      "epoch": 11.647509578544062,
+      "grad_norm": 4.295740127563477,
+      "learning_rate": 4.2723659003831415e-05,
+      "loss": 33.1047,
+      "step": 15200
+    },
+    {
+      "epoch": 11.724137931034482,
+      "grad_norm": 2.4472806453704834,
+      "learning_rate": 4.26757662835249e-05,
+      "loss": 33.8206,
+      "step": 15300
+    },
+    {
+      "epoch": 11.800766283524904,
+      "grad_norm": 2.716550350189209,
+      "learning_rate": 4.262787356321839e-05,
+      "loss": 33.7173,
+      "step": 15400
+    },
+    {
+      "epoch": 11.877394636015326,
+      "grad_norm": 3.1278491020202637,
+      "learning_rate": 4.257998084291188e-05,
+      "loss": 34.0344,
+      "step": 15500
+    },
+    {
+      "epoch": 11.954022988505747,
+      "grad_norm": 2.4835212230682373,
+      "learning_rate": 4.253208812260537e-05,
+      "loss": 33.8397,
+      "step": 15600
+    },
+    {
+      "epoch": 12.0,
+      "eval_loss": 34.70100402832031,
+      "eval_runtime": 49.2554,
+      "eval_samples_per_second": 26.495,
+      "eval_steps_per_second": 3.33,
+      "step": 15660
     }
   ],
   "logging_steps": 100,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 2
       }
     },
     "TrainerControl": {
       "attributes": {}
     }
   },
+  "total_flos": 1.6887730836566016e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null