Training in progress, step 4000, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +294 -5

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:32009f61138d121877590a771ea7b59590e77167dcc592bddce1d3028adcb775
 size 151061672

 version https://git-lfs.github.com/spec/v1
+oid sha256:948197f12c590c0dda47ebc1917b48df09d620ce9166a3a391d7061f05435036
 size 151061672

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7b473282ad7d99494e8d3d04acf1a70c7329a2fe95cff6e4f1efcb04522160a9
 size 297616186

 version https://git-lfs.github.com/spec/v1
+oid sha256:8f16ff8a7474b79d86fe40d0ca22c7f7ed33fcee156c67270373e05fb5778508
 size 297616186

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5e01a4900daa8e0003556007ff54cc9f8fc33170bd737bbc2836da3135ce6440
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:1b63186d5b05381af62b3d7173fde5bc46daf7e9a7d51b3c424f534f7bee96e2
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6d069d021e3ca87c952c1cbefefa02430980ec1efa75a58a8bd98140e99e3bc1
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:5638f21d0f0354e92b9a9f8dfa6255986109c390d6b1c6955ecb1e5acd3b9eab
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 64.5190036982578,
-  "best_model_checkpoint": "./checkpoint-3000",
-  "epoch": 15.015,
   "eval_steps": 1000,
-  "global_step": 3000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -874,6 +874,295 @@
       "eval_steps_per_second": 0.223,
       "eval_wer": 64.5190036982578,
       "step": 3000
     }
   ],
   "logging_steps": 25,
@@ -893,7 +1182,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 2.3541808896e+18,
   "train_batch_size": 32,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 63.24466426057123,
+  "best_model_checkpoint": "./checkpoint-4000",
+  "epoch": 20.02,
   "eval_steps": 1000,
+  "global_step": 4000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_steps_per_second": 0.223,
       "eval_wer": 64.5190036982578,
       "step": 3000
+    },
+    {
+      "epoch": 15.02,
+      "grad_norm": 5.850539207458496,
+      "learning_rate": 4.395555555555556e-06,
+      "loss": 0.0914,
+      "step": 3025
+    },
+    {
+      "epoch": 15.025,
+      "grad_norm": 4.085538387298584,
+      "learning_rate": 4.34e-06,
+      "loss": 0.1051,
+      "step": 3050
+    },
+    {
+      "epoch": 15.03,
+      "grad_norm": 3.830420732498169,
+      "learning_rate": 4.284444444444445e-06,
+      "loss": 0.0627,
+      "step": 3075
+    },
+    {
+      "epoch": 15.035,
+      "grad_norm": 4.171704292297363,
+      "learning_rate": 4.228888888888889e-06,
+      "loss": 0.072,
+      "step": 3100
+    },
+    {
+      "epoch": 16.001,
+      "grad_norm": 6.426929950714111,
+      "learning_rate": 4.173333333333334e-06,
+      "loss": 0.0768,
+      "step": 3125
+    },
+    {
+      "epoch": 16.006,
+      "grad_norm": 2.9023313522338867,
+      "learning_rate": 4.117777777777779e-06,
+      "loss": 0.093,
+      "step": 3150
+    },
+    {
+      "epoch": 16.011,
+      "grad_norm": 3.1878390312194824,
+      "learning_rate": 4.062222222222223e-06,
+      "loss": 0.0799,
+      "step": 3175
+    },
+    {
+      "epoch": 16.016,
+      "grad_norm": 3.0205600261688232,
+      "learning_rate": 4.006666666666667e-06,
+      "loss": 0.0809,
+      "step": 3200
+    },
+    {
+      "epoch": 16.021,
+      "grad_norm": 4.082855701446533,
+      "learning_rate": 3.951111111111112e-06,
+      "loss": 0.0905,
+      "step": 3225
+    },
+    {
+      "epoch": 16.026,
+      "grad_norm": 3.3701670169830322,
+      "learning_rate": 3.895555555555556e-06,
+      "loss": 0.0874,
+      "step": 3250
+    },
+    {
+      "epoch": 16.031,
+      "grad_norm": 4.763386249542236,
+      "learning_rate": 3.8400000000000005e-06,
+      "loss": 0.0602,
+      "step": 3275
+    },
+    {
+      "epoch": 16.036,
+      "grad_norm": 3.008446216583252,
+      "learning_rate": 3.784444444444445e-06,
+      "loss": 0.0632,
+      "step": 3300
+    },
+    {
+      "epoch": 17.002,
+      "grad_norm": 4.296396732330322,
+      "learning_rate": 3.728888888888889e-06,
+      "loss": 0.0774,
+      "step": 3325
+    },
+    {
+      "epoch": 17.007,
+      "grad_norm": 2.4229137897491455,
+      "learning_rate": 3.673333333333334e-06,
+      "loss": 0.0821,
+      "step": 3350
+    },
+    {
+      "epoch": 17.012,
+      "grad_norm": 3.1641666889190674,
+      "learning_rate": 3.617777777777778e-06,
+      "loss": 0.0744,
+      "step": 3375
+    },
+    {
+      "epoch": 17.017,
+      "grad_norm": 3.2084319591522217,
+      "learning_rate": 3.5622222222222224e-06,
+      "loss": 0.0749,
+      "step": 3400
+    },
+    {
+      "epoch": 17.022,
+      "grad_norm": 3.579460620880127,
+      "learning_rate": 3.5066666666666673e-06,
+      "loss": 0.089,
+      "step": 3425
+    },
+    {
+      "epoch": 17.027,
+      "grad_norm": 4.045797824859619,
+      "learning_rate": 3.4511111111111113e-06,
+      "loss": 0.073,
+      "step": 3450
+    },
+    {
+      "epoch": 17.032,
+      "grad_norm": 4.456791400909424,
+      "learning_rate": 3.3955555555555558e-06,
+      "loss": 0.0558,
+      "step": 3475
+    },
+    {
+      "epoch": 17.037,
+      "grad_norm": 4.2783203125,
+      "learning_rate": 3.3400000000000006e-06,
+      "loss": 0.0571,
+      "step": 3500
+    },
+    {
+      "epoch": 18.003,
+      "grad_norm": 2.9448039531707764,
+      "learning_rate": 3.2844444444444447e-06,
+      "loss": 0.0781,
+      "step": 3525
+    },
+    {
+      "epoch": 18.008,
+      "grad_norm": 2.74501371383667,
+      "learning_rate": 3.228888888888889e-06,
+      "loss": 0.0748,
+      "step": 3550
+    },
+    {
+      "epoch": 18.013,
+      "grad_norm": 2.291656494140625,
+      "learning_rate": 3.173333333333334e-06,
+      "loss": 0.0692,
+      "step": 3575
+    },
+    {
+      "epoch": 18.018,
+      "grad_norm": 2.8000614643096924,
+      "learning_rate": 3.117777777777778e-06,
+      "loss": 0.0696,
+      "step": 3600
+    },
+    {
+      "epoch": 18.023,
+      "grad_norm": 3.9665687084198,
+      "learning_rate": 3.0622222222222225e-06,
+      "loss": 0.0883,
+      "step": 3625
+    },
+    {
+      "epoch": 18.028,
+      "grad_norm": 3.8955137729644775,
+      "learning_rate": 3.0066666666666674e-06,
+      "loss": 0.063,
+      "step": 3650
+    },
+    {
+      "epoch": 18.033,
+      "grad_norm": 3.781052827835083,
+      "learning_rate": 2.9511111111111114e-06,
+      "loss": 0.0479,
+      "step": 3675
+    },
+    {
+      "epoch": 18.038,
+      "grad_norm": 3.0369510650634766,
+      "learning_rate": 2.895555555555556e-06,
+      "loss": 0.0542,
+      "step": 3700
+    },
+    {
+      "epoch": 19.004,
+      "grad_norm": 3.2044124603271484,
+      "learning_rate": 2.84e-06,
+      "loss": 0.0774,
+      "step": 3725
+    },
+    {
+      "epoch": 19.009,
+      "grad_norm": 3.152061939239502,
+      "learning_rate": 2.784444444444445e-06,
+      "loss": 0.0698,
+      "step": 3750
+    },
+    {
+      "epoch": 19.014,
+      "grad_norm": 2.7949397563934326,
+      "learning_rate": 2.7288888888888893e-06,
+      "loss": 0.0661,
+      "step": 3775
+    },
+    {
+      "epoch": 19.019,
+      "grad_norm": 2.393399477005005,
+      "learning_rate": 2.6733333333333333e-06,
+      "loss": 0.0644,
+      "step": 3800
+    },
+    {
+      "epoch": 19.024,
+      "grad_norm": 3.772813558578491,
+      "learning_rate": 2.617777777777778e-06,
+      "loss": 0.0869,
+      "step": 3825
+    },
+    {
+      "epoch": 19.029,
+      "grad_norm": 3.960970401763916,
+      "learning_rate": 2.5622222222222226e-06,
+      "loss": 0.0521,
+      "step": 3850
+    },
+    {
+      "epoch": 19.034,
+      "grad_norm": 4.602725505828857,
+      "learning_rate": 2.5066666666666667e-06,
+      "loss": 0.0473,
+      "step": 3875
+    },
+    {
+      "epoch": 19.039,
+      "grad_norm": 0.9137289524078369,
+      "learning_rate": 2.451111111111111e-06,
+      "loss": 0.0497,
+      "step": 3900
+    },
+    {
+      "epoch": 20.005,
+      "grad_norm": 3.5569727420806885,
+      "learning_rate": 2.3955555555555556e-06,
+      "loss": 0.0774,
+      "step": 3925
+    },
+    {
+      "epoch": 20.01,
+      "grad_norm": 2.2422502040863037,
+      "learning_rate": 2.3400000000000005e-06,
+      "loss": 0.0629,
+      "step": 3950
+    },
+    {
+      "epoch": 20.015,
+      "grad_norm": 2.4179162979125977,
+      "learning_rate": 2.2844444444444445e-06,
+      "loss": 0.0637,
+      "step": 3975
+    },
+    {
+      "epoch": 20.02,
+      "grad_norm": 3.352149724960327,
+      "learning_rate": 2.228888888888889e-06,
+      "loss": 0.0655,
+      "step": 4000
+    },
+    {
+      "epoch": 20.02,
+      "eval_loss": 0.20272822678089142,
+      "eval_runtime": 353.3869,
+      "eval_samples_per_second": 1.803,
+      "eval_steps_per_second": 0.226,
+      "eval_wer": 63.24466426057123,
+      "step": 4000
     }
   ],
   "logging_steps": 25,
       "attributes": {}
     }
   },
+  "total_flos": 3.1389078528e+18,
   "train_batch_size": 32,
   "trial_name": null,
   "trial_params": null