Training in progress, epoch 13, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +103 -4

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d087532161fc3d3113f958d4327ca8ab76fb93d1b9005d7b72d8341648a7f95e
 size 1227009528

 version https://git-lfs.github.com/spec/v1
+oid sha256:661988750c1636a8230782954534697c2c428758d1ffff311560504559c94f4d
 size 1227009528

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d359385b3376fb641197873abbd6f199bc67d84ad37382d398095c1f51b664a9
 size 2454133690

 version https://git-lfs.github.com/spec/v1
+oid sha256:78153e1deab7d57bbdde6b48f09655dcca7643e4183944f8a976272944fcf355
 size 2454133690

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3c4d71a933e8a99a1b5e03ca178837d4af39c5cb9255b1959f57ce6925e566d0
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:286894daea5f57f8b75686aeb16b9768a256561b0f4aac865dac3818faf004c7
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0f18daae1b94bcadba9e921cdd5d160fa2fe3e4c34c14e032eed270d5a8a3cca
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:d4a19fb42510963d95f267fb826c867f003e2775662c12b848554d94e9087c80
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 34.700294494628906,
   "best_model_checkpoint": "/kaggle/working/output/checkpoint-13050",
-  "epoch": 12.0,
   "eval_steps": 500,
-  "global_step": 15660,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1195,6 +1195,105 @@
       "eval_samples_per_second": 26.495,
       "eval_steps_per_second": 3.33,
       "step": 15660
     }
   ],
   "logging_steps": 100,
@@ -1209,7 +1308,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 2
       }
     },
     "TrainerControl": {
@@ -1223,7 +1322,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.6887730836566016e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 34.700294494628906,
   "best_model_checkpoint": "/kaggle/working/output/checkpoint-13050",
+  "epoch": 13.0,
   "eval_steps": 500,
+  "global_step": 16965,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 26.495,
       "eval_steps_per_second": 3.33,
       "step": 15660
+    },
+    {
+      "epoch": 12.030651340996169,
+      "grad_norm": 2.331453800201416,
+      "learning_rate": 4.2484195402298856e-05,
+      "loss": 32.9794,
+      "step": 15700
+    },
+    {
+      "epoch": 12.10727969348659,
+      "grad_norm": 2.2127463817596436,
+      "learning_rate": 4.243630268199234e-05,
+      "loss": 33.6367,
+      "step": 15800
+    },
+    {
+      "epoch": 12.183908045977011,
+      "grad_norm": 3.1127703189849854,
+      "learning_rate": 4.238840996168583e-05,
+      "loss": 32.7221,
+      "step": 15900
+    },
+    {
+      "epoch": 12.260536398467433,
+      "grad_norm": 2.5665576457977295,
+      "learning_rate": 4.234051724137931e-05,
+      "loss": 33.7796,
+      "step": 16000
+    },
+    {
+      "epoch": 12.337164750957854,
+      "grad_norm": 2.995265245437622,
+      "learning_rate": 4.22926245210728e-05,
+      "loss": 32.8062,
+      "step": 16100
+    },
+    {
+      "epoch": 12.413793103448276,
+      "grad_norm": 3.4698216915130615,
+      "learning_rate": 4.2244731800766284e-05,
+      "loss": 33.5182,
+      "step": 16200
+    },
+    {
+      "epoch": 12.490421455938698,
+      "grad_norm": 4.030599117279053,
+      "learning_rate": 4.219683908045977e-05,
+      "loss": 33.7621,
+      "step": 16300
+    },
+    {
+      "epoch": 12.567049808429118,
+      "grad_norm": 2.277189254760742,
+      "learning_rate": 4.214894636015326e-05,
+      "loss": 33.7926,
+      "step": 16400
+    },
+    {
+      "epoch": 12.64367816091954,
+      "grad_norm": 2.3156633377075195,
+      "learning_rate": 4.2101053639846744e-05,
+      "loss": 33.869,
+      "step": 16500
+    },
+    {
+      "epoch": 12.720306513409962,
+      "grad_norm": 3.5089361667633057,
+      "learning_rate": 4.205316091954023e-05,
+      "loss": 33.6732,
+      "step": 16600
+    },
+    {
+      "epoch": 12.796934865900383,
+      "grad_norm": 2.5379600524902344,
+      "learning_rate": 4.200526819923372e-05,
+      "loss": 33.5854,
+      "step": 16700
+    },
+    {
+      "epoch": 12.873563218390805,
+      "grad_norm": 2.5784411430358887,
+      "learning_rate": 4.1957375478927205e-05,
+      "loss": 33.2835,
+      "step": 16800
+    },
+    {
+      "epoch": 12.950191570881227,
+      "grad_norm": 2.574859380722046,
+      "learning_rate": 4.190948275862069e-05,
+      "loss": 33.8945,
+      "step": 16900
+    },
+    {
+      "epoch": 13.0,
+      "eval_loss": 34.72227478027344,
+      "eval_runtime": 49.2549,
+      "eval_samples_per_second": 26.495,
+      "eval_steps_per_second": 3.33,
+      "step": 16965
     }
   ],
   "logging_steps": 100,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 3
       }
     },
     "TrainerControl": {
       "attributes": {}
     }
   },
+  "total_flos": 1.8295041739613184e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null