Training in progress, epoch 9, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +105 -6

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:baf95e715d14feeecbf6a32137141a40e7c6ebb37fd4ced245a4b228e0e760c6
 size 1227009528

 version https://git-lfs.github.com/spec/v1
+oid sha256:43bc4ea709fdacfcad21a864c7ef0120d87fb2ee3ef66e8da659ffa444583421
 size 1227009528

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:57865d434d83d8a261c59663a9b74528ff7f76e6caeaff8ebd9b87b92d21935b
 size 2454133690

 version https://git-lfs.github.com/spec/v1
+oid sha256:e223992d46bbe9bb512da1c3a6b7b0881dd041a1208b6f386d718e1eec40e519
 size 2454133690

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:eae9ed662108a7f0520e7e0fb77f777896ccf85b0df0ce63cdb5500ab80485e4
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:e454a17a07c6bd0c2c4504c81830e5f67d059b3b8b8072bf0bd70406421b40e4
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:dce0295becb66b2a3531818b76729e598e30825db8f70c059ee3261485288885
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:8666902435ebb9a3dd0fa595302755da0514dc6e613fbd4ffe9dc07747cb90f1
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 34.818748474121094,
-  "best_model_checkpoint": "/kaggle/working/output/checkpoint-9135",
-  "epoch": 8.0,
   "eval_steps": 500,
-  "global_step": 10440,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -799,6 +799,105 @@
       "eval_samples_per_second": 26.458,
       "eval_steps_per_second": 3.325,
       "step": 10440
     }
   ],
   "logging_steps": 100,
@@ -813,7 +912,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 1
       }
     },
     "TrainerControl": {
@@ -827,7 +926,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.1258487224377344e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 34.75983428955078,
+  "best_model_checkpoint": "/kaggle/working/output/checkpoint-11745",
+  "epoch": 9.0,
   "eval_steps": 500,
+  "global_step": 11745,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 26.458,
       "eval_steps_per_second": 3.325,
       "step": 10440
+    },
+    {
+      "epoch": 8.045977011494253,
+      "grad_norm": 1.8656938076019287,
+      "learning_rate": 4.497318007662836e-05,
+      "loss": 33.8404,
+      "step": 10500
+    },
+    {
+      "epoch": 8.122605363984674,
+      "grad_norm": 2.783926486968994,
+      "learning_rate": 4.4925287356321844e-05,
+      "loss": 33.9544,
+      "step": 10600
+    },
+    {
+      "epoch": 8.199233716475096,
+      "grad_norm": 2.175081968307495,
+      "learning_rate": 4.487739463601533e-05,
+      "loss": 33.6405,
+      "step": 10700
+    },
+    {
+      "epoch": 8.275862068965518,
+      "grad_norm": 4.121524333953857,
+      "learning_rate": 4.482950191570882e-05,
+      "loss": 33.568,
+      "step": 10800
+    },
+    {
+      "epoch": 8.352490421455938,
+      "grad_norm": 3.978410482406616,
+      "learning_rate": 4.4781609195402305e-05,
+      "loss": 33.6659,
+      "step": 10900
+    },
+    {
+      "epoch": 8.42911877394636,
+      "grad_norm": 3.0454840660095215,
+      "learning_rate": 4.473419540229885e-05,
+      "loss": 33.2689,
+      "step": 11000
+    },
+    {
+      "epoch": 8.505747126436782,
+      "grad_norm": 3.169114828109741,
+      "learning_rate": 4.4686302681992336e-05,
+      "loss": 33.6227,
+      "step": 11100
+    },
+    {
+      "epoch": 8.582375478927203,
+      "grad_norm": 2.5880959033966064,
+      "learning_rate": 4.463840996168582e-05,
+      "loss": 33.3022,
+      "step": 11200
+    },
+    {
+      "epoch": 8.659003831417625,
+      "grad_norm": 2.1367762088775635,
+      "learning_rate": 4.459051724137932e-05,
+      "loss": 33.2851,
+      "step": 11300
+    },
+    {
+      "epoch": 8.735632183908045,
+      "grad_norm": 3.0278782844543457,
+      "learning_rate": 4.4542624521072804e-05,
+      "loss": 33.922,
+      "step": 11400
+    },
+    {
+      "epoch": 8.812260536398467,
+      "grad_norm": 2.6361653804779053,
+      "learning_rate": 4.4494731800766284e-05,
+      "loss": 33.1482,
+      "step": 11500
+    },
+    {
+      "epoch": 8.88888888888889,
+      "grad_norm": 2.7836809158325195,
+      "learning_rate": 4.444683908045977e-05,
+      "loss": 34.1345,
+      "step": 11600
+    },
+    {
+      "epoch": 8.96551724137931,
+      "grad_norm": 2.519681453704834,
+      "learning_rate": 4.439894636015326e-05,
+      "loss": 34.0642,
+      "step": 11700
+    },
+    {
+      "epoch": 9.0,
+      "eval_loss": 34.75983428955078,
+      "eval_runtime": 49.3463,
+      "eval_samples_per_second": 26.446,
+      "eval_steps_per_second": 3.323,
+      "step": 11745
     }
   ],
   "logging_steps": 100,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 0
       }
     },
     "TrainerControl": {
       "attributes": {}
     }
   },
+  "total_flos": 1.2665798127424512e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null