Training in progress, epoch 17, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +103 -4

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6e1ea03da8d9a978320d45f1bc6677407a85624af3d9baa7bae32e5c03676367
 size 1227009528

 version https://git-lfs.github.com/spec/v1
+oid sha256:48c4ff7c723a1e608d989b96e44d892070778ec83c952e55c57dddf3c3f48178
 size 1227009528

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b98a2483ec61025369cf6eb8fec5397cf636bfb0ffa7a3eedf987ef5b4b9d5c8
 size 2454133690

 version https://git-lfs.github.com/spec/v1
+oid sha256:4d0a831ac586fb1be369d61d6f0cc41522b60258bcdd25b76e42085747f41185
 size 2454133690

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ffc97010f20f826b75fdc09ec365ad76a45dfcdc64194b72334d5902c2cf28eb
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:46332136f176afc146e73eb2e78a93d7beb2c41f4d0c62f6c39855d4ba7b1979
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c5384c34df266d1db083f57452aa67b48a3012f0aeee7f4ad7194984e89d75fe
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:855574b974c7e1ac9f8ed715f000f5b33d3a42b4a2102f47eec78477a0831f87
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 34.54485321044922,
   "best_model_checkpoint": "/kaggle/working/output/checkpoint-20880",
-  "epoch": 16.0,
   "eval_steps": 500,
-  "global_step": 20880,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1591,6 +1591,105 @@
       "eval_samples_per_second": 26.46,
       "eval_steps_per_second": 3.325,
       "step": 20880
     }
   ],
   "logging_steps": 100,
@@ -1605,7 +1704,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 0
       }
     },
     "TrainerControl": {
@@ -1619,7 +1718,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 2.251697444875469e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 34.54485321044922,
   "best_model_checkpoint": "/kaggle/working/output/checkpoint-20880",
+  "epoch": 17.0,
   "eval_steps": 500,
+  "global_step": 22185,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 26.46,
       "eval_steps_per_second": 3.325,
       "step": 20880
+    },
+    {
+      "epoch": 16.015325670498083,
+      "grad_norm": 2.970867156982422,
+      "learning_rate": 3.999473180076628e-05,
+      "loss": 33.5118,
+      "step": 20900
+    },
+    {
+      "epoch": 16.091954022988507,
+      "grad_norm": 2.395005464553833,
+      "learning_rate": 3.994683908045978e-05,
+      "loss": 34.1932,
+      "step": 21000
+    },
+    {
+      "epoch": 16.168582375478927,
+      "grad_norm": 2.8175065517425537,
+      "learning_rate": 3.9898946360153264e-05,
+      "loss": 32.9815,
+      "step": 21100
+    },
+    {
+      "epoch": 16.245210727969347,
+      "grad_norm": 4.665389537811279,
+      "learning_rate": 3.985105363984675e-05,
+      "loss": 33.8616,
+      "step": 21200
+    },
+    {
+      "epoch": 16.32183908045977,
+      "grad_norm": 3.425340175628662,
+      "learning_rate": 3.980316091954023e-05,
+      "loss": 33.2022,
+      "step": 21300
+    },
+    {
+      "epoch": 16.39846743295019,
+      "grad_norm": 5.212127685546875,
+      "learning_rate": 3.975574712643678e-05,
+      "loss": 33.3935,
+      "step": 21400
+    },
+    {
+      "epoch": 16.47509578544061,
+      "grad_norm": 1.9034606218338013,
+      "learning_rate": 3.970785440613027e-05,
+      "loss": 32.739,
+      "step": 21500
+    },
+    {
+      "epoch": 16.551724137931036,
+      "grad_norm": 2.024109125137329,
+      "learning_rate": 3.9659961685823756e-05,
+      "loss": 33.4628,
+      "step": 21600
+    },
+    {
+      "epoch": 16.628352490421456,
+      "grad_norm": 2.8185606002807617,
+      "learning_rate": 3.961206896551724e-05,
+      "loss": 33.7672,
+      "step": 21700
+    },
+    {
+      "epoch": 16.704980842911876,
+      "grad_norm": 3.2981534004211426,
+      "learning_rate": 3.956417624521073e-05,
+      "loss": 33.1976,
+      "step": 21800
+    },
+    {
+      "epoch": 16.7816091954023,
+      "grad_norm": 4.531330585479736,
+      "learning_rate": 3.951628352490422e-05,
+      "loss": 33.2379,
+      "step": 21900
+    },
+    {
+      "epoch": 16.85823754789272,
+      "grad_norm": 2.4455623626708984,
+      "learning_rate": 3.9468390804597704e-05,
+      "loss": 33.2898,
+      "step": 22000
+    },
+    {
+      "epoch": 16.93486590038314,
+      "grad_norm": 4.1596245765686035,
+      "learning_rate": 3.942049808429119e-05,
+      "loss": 33.2167,
+      "step": 22100
+    },
+    {
+      "epoch": 17.0,
+      "eval_loss": 34.65380096435547,
+      "eval_runtime": 49.3114,
+      "eval_samples_per_second": 26.464,
+      "eval_steps_per_second": 3.326,
+      "step": 22185
     }
   ],
   "logging_steps": 100,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 1
       }
     },
     "TrainerControl": {
       "attributes": {}
     }
   },
+  "total_flos": 2.3924285351801856e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null