Training in progress, epoch 32, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +103 -4

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2719c12471af591c878a21526618319ad4ac35dce0e07dad1360e72204de1768
 size 1227009528

 version https://git-lfs.github.com/spec/v1
+oid sha256:c42a1f9b7d4161ba1bae9d13deadbeb763fe153dec6526f8caefd629b413acef
 size 1227009528

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:062cca4f6db12b1f947fb25e682653530fbcd8ea11dd9b4ae6ede9bedeb50d81
 size 2454133690

 version https://git-lfs.github.com/spec/v1
+oid sha256:192e681f7b02b75d8744a13995bf0b21ddfec28797c5b2117fc39e50a373e7e3
 size 2454133690

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1d7fa531d7265d7bc31fddb022d4f0400b7f58d98abd17e982bc79b081e31451
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:f2c4db18d5a66e89d462b11782d22327d30dcce66816316fdc2ddae7a53a0ffe
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b4fc7201efed2cc22744d4d8152589ca043a54ef0c596de51adaecf88fdd063e
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:ccb7fdd86b6cca8b702924e4a75b448b78185ab1cd7362e327e12720bd73d09b
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 34.4583740234375,
   "best_model_checkpoint": "/kaggle/working/output/checkpoint-36540",
-  "epoch": 31.0,
   "eval_steps": 500,
-  "global_step": 40455,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -3083,6 +3083,105 @@
       "eval_samples_per_second": 26.415,
       "eval_steps_per_second": 3.32,
       "step": 40455
     }
   ],
   "logging_steps": 100,
@@ -3097,7 +3196,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 3
       }
     },
     "TrainerControl": {
@@ -3111,7 +3210,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 4.362663799446221e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 34.4583740234375,
   "best_model_checkpoint": "/kaggle/working/output/checkpoint-36540",
+  "epoch": 32.0,
   "eval_steps": 500,
+  "global_step": 41760,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 26.415,
       "eval_steps_per_second": 3.32,
       "step": 40455
+    },
+    {
+      "epoch": 31.03448275862069,
+      "grad_norm": 3.288548231124878,
+      "learning_rate": 3.061302681992337e-05,
+      "loss": 33.4014,
+      "step": 40500
+    },
+    {
+      "epoch": 31.11111111111111,
+      "grad_norm": 4.078604221343994,
+      "learning_rate": 3.056513409961686e-05,
+      "loss": 33.5796,
+      "step": 40600
+    },
+    {
+      "epoch": 31.18773946360153,
+      "grad_norm": 3.589484691619873,
+      "learning_rate": 3.0517241379310348e-05,
+      "loss": 32.9547,
+      "step": 40700
+    },
+    {
+      "epoch": 31.264367816091955,
+      "grad_norm": 3.1043126583099365,
+      "learning_rate": 3.046934865900383e-05,
+      "loss": 33.2105,
+      "step": 40800
+    },
+    {
+      "epoch": 31.340996168582375,
+      "grad_norm": 2.446356773376465,
+      "learning_rate": 3.0421455938697318e-05,
+      "loss": 33.1642,
+      "step": 40900
+    },
+    {
+      "epoch": 31.417624521072796,
+      "grad_norm": 2.966627597808838,
+      "learning_rate": 3.0373563218390805e-05,
+      "loss": 32.7751,
+      "step": 41000
+    },
+    {
+      "epoch": 31.49425287356322,
+      "grad_norm": 4.547020435333252,
+      "learning_rate": 3.0325670498084292e-05,
+      "loss": 33.8578,
+      "step": 41100
+    },
+    {
+      "epoch": 31.57088122605364,
+      "grad_norm": 3.151139259338379,
+      "learning_rate": 3.0277777777777776e-05,
+      "loss": 33.2976,
+      "step": 41200
+    },
+    {
+      "epoch": 31.64750957854406,
+      "grad_norm": 2.8900582790374756,
+      "learning_rate": 3.0229885057471262e-05,
+      "loss": 33.1161,
+      "step": 41300
+    },
+    {
+      "epoch": 31.724137931034484,
+      "grad_norm": 2.5485446453094482,
+      "learning_rate": 3.0181992337164756e-05,
+      "loss": 33.596,
+      "step": 41400
+    },
+    {
+      "epoch": 31.800766283524904,
+      "grad_norm": 2.5474777221679688,
+      "learning_rate": 3.0134099616858243e-05,
+      "loss": 33.3569,
+      "step": 41500
+    },
+    {
+      "epoch": 31.877394636015325,
+      "grad_norm": 3.6182713508605957,
+      "learning_rate": 3.0086206896551726e-05,
+      "loss": 32.824,
+      "step": 41600
+    },
+    {
+      "epoch": 31.95402298850575,
+      "grad_norm": 3.898332118988037,
+      "learning_rate": 3.0038314176245213e-05,
+      "loss": 32.8775,
+      "step": 41700
+    },
+    {
+      "epoch": 32.0,
+      "eval_loss": 34.500526428222656,
+      "eval_runtime": 49.4041,
+      "eval_samples_per_second": 26.415,
+      "eval_steps_per_second": 3.32,
+      "step": 41760
     }
   ],
   "logging_steps": 100,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 4
       }
     },
     "TrainerControl": {
       "attributes": {}
     }
   },
+  "total_flos": 4.503394889750938e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null