Training in progress, epoch 8, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +103 -4

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ed8ae4ab42781745b2126513149381b5b6ff7214663e4449b01b2359d8311e3a
 size 1227009528

 version https://git-lfs.github.com/spec/v1
+oid sha256:baf95e715d14feeecbf6a32137141a40e7c6ebb37fd4ced245a4b228e0e760c6
 size 1227009528

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ac9434c970128034aa8822b8a0401c794db85187656338f45cc845953009b5b3
 size 2454133690

 version https://git-lfs.github.com/spec/v1
+oid sha256:57865d434d83d8a261c59663a9b74528ff7f76e6caeaff8ebd9b87b92d21935b
 size 2454133690

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f7565598da56842799ee28845bb7d5540de84da2eb38da30890faa373e17c3ad
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:eae9ed662108a7f0520e7e0fb77f777896ccf85b0df0ce63cdb5500ab80485e4
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5c0cd9d4d4eb26e88d5f90eed6823e5f94040581f03708c992959bc0b84c560f
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:dce0295becb66b2a3531818b76729e598e30825db8f70c059ee3261485288885
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 34.818748474121094,
   "best_model_checkpoint": "/kaggle/working/output/checkpoint-9135",
-  "epoch": 7.0,
   "eval_steps": 500,
-  "global_step": 9135,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -700,6 +700,105 @@
       "eval_samples_per_second": 26.469,
       "eval_steps_per_second": 3.326,
       "step": 9135
     }
   ],
   "logging_steps": 100,
@@ -714,7 +813,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 0
       }
     },
     "TrainerControl": {
@@ -728,7 +827,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 9851176321330176.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 34.818748474121094,
   "best_model_checkpoint": "/kaggle/working/output/checkpoint-9135",
+  "epoch": 8.0,
   "eval_steps": 500,
+  "global_step": 10440,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 26.469,
       "eval_steps_per_second": 3.326,
       "step": 9135
+    },
+    {
+      "epoch": 7.049808429118774,
+      "grad_norm": 3.117553472518921,
+      "learning_rate": 4.5595785440613034e-05,
+      "loss": 34.1218,
+      "step": 9200
+    },
+    {
+      "epoch": 7.126436781609195,
+      "grad_norm": 2.5572612285614014,
+      "learning_rate": 4.5547892720306515e-05,
+      "loss": 33.662,
+      "step": 9300
+    },
+    {
+      "epoch": 7.203065134099617,
+      "grad_norm": 3.5347042083740234,
+      "learning_rate": 4.55e-05,
+      "loss": 34.4668,
+      "step": 9400
+    },
+    {
+      "epoch": 7.2796934865900385,
+      "grad_norm": 1.9216647148132324,
+      "learning_rate": 4.545210727969349e-05,
+      "loss": 33.4468,
+      "step": 9500
+    },
+    {
+      "epoch": 7.35632183908046,
+      "grad_norm": 4.242152214050293,
+      "learning_rate": 4.5404214559386975e-05,
+      "loss": 33.5805,
+      "step": 9600
+    },
+    {
+      "epoch": 7.432950191570881,
+      "grad_norm": 2.9310567378997803,
+      "learning_rate": 4.535632183908046e-05,
+      "loss": 34.0603,
+      "step": 9700
+    },
+    {
+      "epoch": 7.509578544061303,
+      "grad_norm": 2.6573023796081543,
+      "learning_rate": 4.530842911877395e-05,
+      "loss": 33.8766,
+      "step": 9800
+    },
+    {
+      "epoch": 7.586206896551724,
+      "grad_norm": 2.7849409580230713,
+      "learning_rate": 4.5260536398467436e-05,
+      "loss": 33.6309,
+      "step": 9900
+    },
+    {
+      "epoch": 7.662835249042145,
+      "grad_norm": 2.7377357482910156,
+      "learning_rate": 4.521264367816092e-05,
+      "loss": 33.3621,
+      "step": 10000
+    },
+    {
+      "epoch": 7.739463601532567,
+      "grad_norm": 2.106233835220337,
+      "learning_rate": 4.516475095785441e-05,
+      "loss": 33.4172,
+      "step": 10100
+    },
+    {
+      "epoch": 7.816091954022989,
+      "grad_norm": 2.1989126205444336,
+      "learning_rate": 4.5116858237547896e-05,
+      "loss": 33.5937,
+      "step": 10200
+    },
+    {
+      "epoch": 7.89272030651341,
+      "grad_norm": 2.903721570968628,
+      "learning_rate": 4.5068965517241377e-05,
+      "loss": 33.7935,
+      "step": 10300
+    },
+    {
+      "epoch": 7.969348659003831,
+      "grad_norm": 2.061602830886841,
+      "learning_rate": 4.5021072796934863e-05,
+      "loss": 33.3289,
+      "step": 10400
+    },
+    {
+      "epoch": 8.0,
+      "eval_loss": 34.95075607299805,
+      "eval_runtime": 49.3237,
+      "eval_samples_per_second": 26.458,
+      "eval_steps_per_second": 3.325,
+      "step": 10440
     }
   ],
   "logging_steps": 100,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 1
       }
     },
     "TrainerControl": {
       "attributes": {}
     }
   },
+  "total_flos": 1.1258487224377344e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null