Training in progress, epoch 30, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +103 -4

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:daef4e55ad3f9cac7476db8fc6c0558852fa9a93b615790669d2f15c7a8efc1b
 size 1227009528

 version https://git-lfs.github.com/spec/v1
+oid sha256:2803b5ea78a77c33b077e0060ee73753f95d174d834b993bc91485092dcde1c9
 size 1227009528

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f4d1575be4a450eb389f45ccce71c57c7f233f5a2b6bd3384717eae5a12f344b
 size 2454133690

 version https://git-lfs.github.com/spec/v1
+oid sha256:32e5494189f649e0e13e6f06d32a81e216f34cb7f0ab96e7dcedf6d8cc0cf2ad
 size 2454133690

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:dd4ea3a90506cd06614bd07f48e2ae5e6d0bcae66d2c1234c0b2b3e5e6f04045
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:14e90b56db58b69cb5e05f5a06dece018d9fd278779eef8662306f8a599fad84
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7ca503ac3f6a648f00e869d6e43b6905f33cfb3af705d6edf930285c1fae1c03
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:2f3041d0db6b547a1562ecb7021cfeb0bcc92669d8d944852d4251894eeac567
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 34.4583740234375,
   "best_model_checkpoint": "/kaggle/working/output/checkpoint-36540",
-  "epoch": 29.0,
   "eval_steps": 500,
-  "global_step": 37845,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2885,6 +2885,105 @@
       "eval_samples_per_second": 26.444,
       "eval_steps_per_second": 3.323,
       "step": 37845
     }
   ],
   "logging_steps": 100,
@@ -2899,7 +2998,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 1
       }
     },
     "TrainerControl": {
@@ -2913,7 +3012,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 4.081201618836787e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 34.4583740234375,
   "best_model_checkpoint": "/kaggle/working/output/checkpoint-36540",
+  "epoch": 30.0,
   "eval_steps": 500,
+  "global_step": 39150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 26.444,
       "eval_steps_per_second": 3.323,
       "step": 37845
+    },
+    {
+      "epoch": 29.042145593869733,
+      "grad_norm": 4.515305042266846,
+      "learning_rate": 3.185727969348659e-05,
+      "loss": 33.5753,
+      "step": 37900
+    },
+    {
+      "epoch": 29.118773946360154,
+      "grad_norm": 3.7501096725463867,
+      "learning_rate": 3.1809386973180076e-05,
+      "loss": 33.3131,
+      "step": 38000
+    },
+    {
+      "epoch": 29.195402298850574,
+      "grad_norm": 3.431818723678589,
+      "learning_rate": 3.176149425287356e-05,
+      "loss": 33.085,
+      "step": 38100
+    },
+    {
+      "epoch": 29.272030651340994,
+      "grad_norm": 3.4503543376922607,
+      "learning_rate": 3.171360153256705e-05,
+      "loss": 32.7894,
+      "step": 38200
+    },
+    {
+      "epoch": 29.34865900383142,
+      "grad_norm": 4.361378192901611,
+      "learning_rate": 3.1665708812260536e-05,
+      "loss": 33.4922,
+      "step": 38300
+    },
+    {
+      "epoch": 29.42528735632184,
+      "grad_norm": 2.354480504989624,
+      "learning_rate": 3.161781609195402e-05,
+      "loss": 33.3214,
+      "step": 38400
+    },
+    {
+      "epoch": 29.50191570881226,
+      "grad_norm": 3.3123044967651367,
+      "learning_rate": 3.156992337164751e-05,
+      "loss": 33.3181,
+      "step": 38500
+    },
+    {
+      "epoch": 29.578544061302683,
+      "grad_norm": 2.3824117183685303,
+      "learning_rate": 3.1522030651341e-05,
+      "loss": 33.0926,
+      "step": 38600
+    },
+    {
+      "epoch": 29.655172413793103,
+      "grad_norm": 2.811178684234619,
+      "learning_rate": 3.1474137931034484e-05,
+      "loss": 33.3361,
+      "step": 38700
+    },
+    {
+      "epoch": 29.731800766283524,
+      "grad_norm": 4.715090751647949,
+      "learning_rate": 3.142624521072797e-05,
+      "loss": 32.8444,
+      "step": 38800
+    },
+    {
+      "epoch": 29.808429118773947,
+      "grad_norm": 2.191209316253662,
+      "learning_rate": 3.137835249042146e-05,
+      "loss": 33.9677,
+      "step": 38900
+    },
+    {
+      "epoch": 29.885057471264368,
+      "grad_norm": 2.606814384460449,
+      "learning_rate": 3.1330459770114945e-05,
+      "loss": 33.3536,
+      "step": 39000
+    },
+    {
+      "epoch": 29.961685823754788,
+      "grad_norm": 4.8533172607421875,
+      "learning_rate": 3.128256704980843e-05,
+      "loss": 33.2721,
+      "step": 39100
+    },
+    {
+      "epoch": 30.0,
+      "eval_loss": 34.46094512939453,
+      "eval_runtime": 49.4265,
+      "eval_samples_per_second": 26.403,
+      "eval_steps_per_second": 3.318,
+      "step": 39150
     }
   ],
   "logging_steps": 100,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 2
       }
     },
     "TrainerControl": {
       "attributes": {}
     }
   },
+  "total_flos": 4.221932709141504e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null