Training in progress, epoch 14, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +103 -4

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:661988750c1636a8230782954534697c2c428758d1ffff311560504559c94f4d
 size 1227009528

 version https://git-lfs.github.com/spec/v1
+oid sha256:d3dc2253f6a705d4581a259cf35a73237854fffb23cfd63c819297d468b440f4
 size 1227009528

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:78153e1deab7d57bbdde6b48f09655dcca7643e4183944f8a976272944fcf355
 size 2454133690

 version https://git-lfs.github.com/spec/v1
+oid sha256:834b487b2e27a2a0a519304d2774ed6f8a3bc0bf00c5ec3f97eb6a666a2f12df
 size 2454133690

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:286894daea5f57f8b75686aeb16b9768a256561b0f4aac865dac3818faf004c7
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:b337fd52d1f50e0cc3a5fd860e091b39657ac975bbe77a94898d8354c966157f
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d4a19fb42510963d95f267fb826c867f003e2775662c12b848554d94e9087c80
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:099df6d11185a7212be93e879fb321f1e41d7aa7efedf5f52a9ceeb4b4dffb4b
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 34.700294494628906,
   "best_model_checkpoint": "/kaggle/working/output/checkpoint-13050",
-  "epoch": 13.0,
   "eval_steps": 500,
-  "global_step": 16965,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1294,6 +1294,105 @@
       "eval_samples_per_second": 26.495,
       "eval_steps_per_second": 3.33,
       "step": 16965
     }
   ],
   "logging_steps": 100,
@@ -1308,7 +1407,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 3
       }
     },
     "TrainerControl": {
@@ -1322,7 +1421,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.8295041739613184e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 34.700294494628906,
   "best_model_checkpoint": "/kaggle/working/output/checkpoint-13050",
+  "epoch": 14.0,
   "eval_steps": 500,
+  "global_step": 18270,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 26.495,
       "eval_steps_per_second": 3.33,
       "step": 16965
+    },
+    {
+      "epoch": 13.026819923371647,
+      "grad_norm": 3.8546385765075684,
+      "learning_rate": 4.186159003831418e-05,
+      "loss": 33.1455,
+      "step": 17000
+    },
+    {
+      "epoch": 13.10344827586207,
+      "grad_norm": 3.751404047012329,
+      "learning_rate": 4.1813697318007665e-05,
+      "loss": 33.7843,
+      "step": 17100
+    },
+    {
+      "epoch": 13.18007662835249,
+      "grad_norm": 3.0844898223876953,
+      "learning_rate": 4.176580459770115e-05,
+      "loss": 32.8163,
+      "step": 17200
+    },
+    {
+      "epoch": 13.256704980842912,
+      "grad_norm": 1.7570416927337646,
+      "learning_rate": 4.1718390804597704e-05,
+      "loss": 33.3296,
+      "step": 17300
+    },
+    {
+      "epoch": 13.333333333333334,
+      "grad_norm": 2.5809695720672607,
+      "learning_rate": 4.1670498084291184e-05,
+      "loss": 34.1621,
+      "step": 17400
+    },
+    {
+      "epoch": 13.409961685823754,
+      "grad_norm": 2.564545154571533,
+      "learning_rate": 4.162260536398467e-05,
+      "loss": 33.4641,
+      "step": 17500
+    },
+    {
+      "epoch": 13.486590038314176,
+      "grad_norm": 3.2340521812438965,
+      "learning_rate": 4.1574712643678165e-05,
+      "loss": 33.5958,
+      "step": 17600
+    },
+    {
+      "epoch": 13.563218390804598,
+      "grad_norm": 4.329983711242676,
+      "learning_rate": 4.152681992337165e-05,
+      "loss": 33.53,
+      "step": 17700
+    },
+    {
+      "epoch": 13.639846743295019,
+      "grad_norm": 2.3342621326446533,
+      "learning_rate": 4.147892720306514e-05,
+      "loss": 33.7702,
+      "step": 17800
+    },
+    {
+      "epoch": 13.71647509578544,
+      "grad_norm": 2.6764466762542725,
+      "learning_rate": 4.1431034482758625e-05,
+      "loss": 33.6024,
+      "step": 17900
+    },
+    {
+      "epoch": 13.793103448275861,
+      "grad_norm": 5.089807033538818,
+      "learning_rate": 4.138314176245211e-05,
+      "loss": 32.9291,
+      "step": 18000
+    },
+    {
+      "epoch": 13.869731800766283,
+      "grad_norm": 2.4803364276885986,
+      "learning_rate": 4.13352490421456e-05,
+      "loss": 33.2098,
+      "step": 18100
+    },
+    {
+      "epoch": 13.946360153256705,
+      "grad_norm": 3.0112080574035645,
+      "learning_rate": 4.128735632183908e-05,
+      "loss": 33.7988,
+      "step": 18200
+    },
+    {
+      "epoch": 14.0,
+      "eval_loss": 34.82696533203125,
+      "eval_runtime": 49.261,
+      "eval_samples_per_second": 26.492,
+      "eval_steps_per_second": 3.329,
+      "step": 18270
     }
   ],
   "logging_steps": 100,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 4
       }
     },
     "TrainerControl": {
       "attributes": {}
     }
   },
+  "total_flos": 1.970235264266035e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null