Training in progress, epoch 11, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +103 -4

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0b0578188a7562ebb3f653a9c172c7aab35806f8a64c735c6b610d5e2438e16f
 size 1227009528

 version https://git-lfs.github.com/spec/v1
+oid sha256:db4b5d9091a6dbab9d2b4be7cf992134ba4a3e0d729e96284bc4512ac0932620
 size 1227009528

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5bd28c218e06e0ddb714109b224f0d2d6ff0943a81c87b19c895d46869fe043e
 size 2454133690

 version https://git-lfs.github.com/spec/v1
+oid sha256:7d9dbc9961f1b825d07e327826af5885ae6801dfe3867c659b03e9c90764c433
 size 2454133690

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e31bad291fd392e01d851c04b44cf7cac0f5f8b28830534382ca16e10c847e7a
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:1671761f2a32f97e49b389d83fe64fe54fae391ec682766d59ea01e911801f0d
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ac3125827e91c83a2b02ffbd5e22748b751677850854e358e914d72d2a70c5e5
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:8a8f171c30ec70e8b7de39e28734b3eb14c402c92c5675eccaa14ecf588e6cff
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 34.700294494628906,
   "best_model_checkpoint": "/kaggle/working/output/checkpoint-13050",
-  "epoch": 10.0,
   "eval_steps": 500,
-  "global_step": 13050,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -997,6 +997,105 @@
       "eval_samples_per_second": 26.475,
       "eval_steps_per_second": 3.327,
       "step": 13050
     }
   ],
   "logging_steps": 100,
@@ -1011,7 +1110,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 0
       }
     },
     "TrainerControl": {
@@ -1025,7 +1124,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.407310903047168e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 34.700294494628906,
   "best_model_checkpoint": "/kaggle/working/output/checkpoint-13050",
+  "epoch": 11.0,
   "eval_steps": 500,
+  "global_step": 14355,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 26.475,
       "eval_steps_per_second": 3.327,
       "step": 13050
+    },
+    {
+      "epoch": 10.03831417624521,
+      "grad_norm": 2.5322816371917725,
+      "learning_rate": 4.3728448275862074e-05,
+      "loss": 33.8873,
+      "step": 13100
+    },
+    {
+      "epoch": 10.114942528735632,
+      "grad_norm": 2.1063241958618164,
+      "learning_rate": 4.368103448275862e-05,
+      "loss": 33.871,
+      "step": 13200
+    },
+    {
+      "epoch": 10.191570881226054,
+      "grad_norm": 3.7001326084136963,
+      "learning_rate": 4.3633141762452106e-05,
+      "loss": 34.5129,
+      "step": 13300
+    },
+    {
+      "epoch": 10.268199233716475,
+      "grad_norm": 1.8534705638885498,
+      "learning_rate": 4.35852490421456e-05,
+      "loss": 33.7739,
+      "step": 13400
+    },
+    {
+      "epoch": 10.344827586206897,
+      "grad_norm": 1.9871069192886353,
+      "learning_rate": 4.3537356321839086e-05,
+      "loss": 33.4124,
+      "step": 13500
+    },
+    {
+      "epoch": 10.421455938697317,
+      "grad_norm": 2.264529228210449,
+      "learning_rate": 4.348946360153257e-05,
+      "loss": 33.24,
+      "step": 13600
+    },
+    {
+      "epoch": 10.49808429118774,
+      "grad_norm": 3.0297787189483643,
+      "learning_rate": 4.344157088122606e-05,
+      "loss": 33.2922,
+      "step": 13700
+    },
+    {
+      "epoch": 10.574712643678161,
+      "grad_norm": 2.7185864448547363,
+      "learning_rate": 4.339367816091954e-05,
+      "loss": 33.4859,
+      "step": 13800
+    },
+    {
+      "epoch": 10.651340996168582,
+      "grad_norm": 3.8887524604797363,
+      "learning_rate": 4.334578544061303e-05,
+      "loss": 33.4322,
+      "step": 13900
+    },
+    {
+      "epoch": 10.727969348659004,
+      "grad_norm": 2.5119857788085938,
+      "learning_rate": 4.3297892720306514e-05,
+      "loss": 33.6234,
+      "step": 14000
+    },
+    {
+      "epoch": 10.804597701149426,
+      "grad_norm": 3.2969565391540527,
+      "learning_rate": 4.325e-05,
+      "loss": 33.4341,
+      "step": 14100
+    },
+    {
+      "epoch": 10.881226053639846,
+      "grad_norm": 3.3629229068756104,
+      "learning_rate": 4.320210727969349e-05,
+      "loss": 32.7636,
+      "step": 14200
+    },
+    {
+      "epoch": 10.957854406130268,
+      "grad_norm": 3.0765013694763184,
+      "learning_rate": 4.3154214559386975e-05,
+      "loss": 33.7066,
+      "step": 14300
+    },
+    {
+      "epoch": 11.0,
+      "eval_loss": 34.70278549194336,
+      "eval_runtime": 49.2928,
+      "eval_samples_per_second": 26.474,
+      "eval_steps_per_second": 3.327,
+      "step": 14355
     }
   ],
   "logging_steps": 100,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 1
       }
     },
     "TrainerControl": {
       "attributes": {}
     }
   },
+  "total_flos": 1.5480419933518848e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null