Training in progress, step 5400, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +148 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ce8cb0c1636a06b1e76c546c4b0282f02b71ccad43c283d3d33d43185c64edec
 size 527048968

 version https://git-lfs.github.com/spec/v1
+oid sha256:d4eaaf03b78bd99375228f4e3780fd0588fc02582773008769bf7177550d4b48
 size 527048968

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:70afac3958025bed818f692a236ab6bf6b28db45140796294f475309149762a5
 size 1054135994

 version https://git-lfs.github.com/spec/v1
+oid sha256:87baf48f45b21dd7d9a1576417255bf546e8558ecd18cd75468fb9ffa32e54f8
 size 1054135994

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a7de188e422eb0da886da3c865f1df00995a0a219ebff0d43a41d74c3b9d38d5
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:cde3dd12f91204388f748ef22c42d0af6362a11af96ae2767080c430a3556fd7
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7cc08ef7615af7896731786745ca416272561837649d6bc1ff644d72a48c9b0d
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:0d8a06f6e764a4c806b3b6aa6930ec3c05d14769ecbf5db87f5122a0c04e591e
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 0.7166205048561096,
   "best_model_checkpoint": "./output/checkpoint-450",
-  "epoch": 228.2608695652174,
   "eval_steps": 150,
-  "global_step": 5250,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -5082,6 +5082,151 @@
       "EMA_steps_per_second": 25.783,
       "epoch": 228.2608695652174,
       "step": 5250
     }
   ],
   "logging_steps": 10,
@@ -5101,7 +5246,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.3495580841170534e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 0.7166205048561096,
   "best_model_checkpoint": "./output/checkpoint-450",
+  "epoch": 234.7826086956522,
   "eval_steps": 150,
+  "global_step": 5400,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "EMA_steps_per_second": 25.783,
       "epoch": 228.2608695652174,
       "step": 5250
+    },
+    {
+      "epoch": 228.69565217391303,
+      "grad_norm": 3.2444028854370117,
+      "learning_rate": 1.085552546442246e-07,
+      "loss": 0.2128,
+      "step": 5260
+    },
+    {
+      "epoch": 229.1304347826087,
+      "grad_norm": 2.6189353466033936,
+      "learning_rate": 2.171105092884492e-07,
+      "loss": 0.2259,
+      "step": 5270
+    },
+    {
+      "epoch": 229.56521739130434,
+      "grad_norm": 2.1571950912475586,
+      "learning_rate": 3.2566576393267376e-07,
+      "loss": 0.2638,
+      "step": 5280
+    },
+    {
+      "epoch": 230.0,
+      "grad_norm": 2.62457013130188,
+      "learning_rate": 4.342210185768984e-07,
+      "loss": 0.2062,
+      "step": 5290
+    },
+    {
+      "epoch": 230.43478260869566,
+      "grad_norm": 1.7154628038406372,
+      "learning_rate": 5.42776273221123e-07,
+      "loss": 0.2114,
+      "step": 5300
+    },
+    {
+      "epoch": 230.8695652173913,
+      "grad_norm": 2.018242835998535,
+      "learning_rate": 6.513315278653475e-07,
+      "loss": 0.24,
+      "step": 5310
+    },
+    {
+      "epoch": 231.30434782608697,
+      "grad_norm": 2.380286693572998,
+      "learning_rate": 7.598867825095721e-07,
+      "loss": 0.214,
+      "step": 5320
+    },
+    {
+      "epoch": 231.7391304347826,
+      "grad_norm": 1.5191930532455444,
+      "learning_rate": 8.684420371537968e-07,
+      "loss": 0.2008,
+      "step": 5330
+    },
+    {
+      "epoch": 232.17391304347825,
+      "grad_norm": 2.8159291744232178,
+      "learning_rate": 9.769972917980214e-07,
+      "loss": 0.2521,
+      "step": 5340
+    },
+    {
+      "epoch": 232.6086956521739,
+      "grad_norm": 2.023869752883911,
+      "learning_rate": 1.085552546442246e-06,
+      "loss": 0.2095,
+      "step": 5350
+    },
+    {
+      "epoch": 233.04347826086956,
+      "grad_norm": 1.9053815603256226,
+      "learning_rate": 1.0855524388726596e-06,
+      "loss": 0.2192,
+      "step": 5360
+    },
+    {
+      "epoch": 233.47826086956522,
+      "grad_norm": 2.135075092315674,
+      "learning_rate": 1.0855521161639428e-06,
+      "loss": 0.2301,
+      "step": 5370
+    },
+    {
+      "epoch": 233.91304347826087,
+      "grad_norm": 2.302385091781616,
+      "learning_rate": 1.0855515783162238e-06,
+      "loss": 0.2402,
+      "step": 5380
+    },
+    {
+      "epoch": 234.34782608695653,
+      "grad_norm": 2.3211662769317627,
+      "learning_rate": 1.0855508253297159e-06,
+      "loss": 0.2156,
+      "step": 5390
+    },
+    {
+      "epoch": 234.7826086956522,
+      "grad_norm": 2.5585744380950928,
+      "learning_rate": 1.0855498572047172e-06,
+      "loss": 0.2164,
+      "step": 5400
+    },
+    {
+      "epoch": 234.7826086956522,
+      "eval_loss": 0.9845991134643555,
+      "eval_runtime": 0.3916,
+      "eval_samples_per_second": 25.539,
+      "eval_steps_per_second": 25.539,
+      "step": 5400
+    },
+    {
+      "Start_State_loss": 0.8609819412231445,
+      "Start_State_runtime": 0.3948,
+      "Start_State_samples_per_second": 25.328,
+      "Start_State_steps_per_second": 25.328,
+      "epoch": 234.7826086956522,
+      "step": 5400
+    },
+    {
+      "Raw_Model_loss": 0.9845991134643555,
+      "Raw_Model_runtime": 0.3961,
+      "Raw_Model_samples_per_second": 25.244,
+      "Raw_Model_steps_per_second": 25.244,
+      "epoch": 234.7826086956522,
+      "step": 5400
+    },
+    {
+      "SWA_loss": 0.8302789926528931,
+      "SWA_runtime": 0.3853,
+      "SWA_samples_per_second": 25.953,
+      "SWA_steps_per_second": 25.953,
+      "epoch": 234.7826086956522,
+      "step": 5400
+    },
+    {
+      "EMA_loss": 0.8594452142715454,
+      "EMA_runtime": 0.3912,
+      "EMA_samples_per_second": 25.563,
+      "EMA_steps_per_second": 25.563,
+      "epoch": 234.7826086956522,
+      "step": 5400
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 1.3884011525792563e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null