| { |
| "best_metric": 0.0857774206508638, |
| "best_model_checkpoint": "PhoWhisper-small-vispeech-classifier-v3/checkpoint-392", |
| "epoch": 2.0, |
| "eval_steps": 500, |
| "global_step": 784, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.012755102040816327, |
| "grad_norm": 48589.94921875, |
| "learning_rate": 6.377551020408164e-08, |
| "loss": 2.0884, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.025510204081632654, |
| "grad_norm": 50532.125, |
| "learning_rate": 1.2755102040816328e-07, |
| "loss": 2.0887, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.03826530612244898, |
| "grad_norm": 50246.4296875, |
| "learning_rate": 1.913265306122449e-07, |
| "loss": 2.0885, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.05102040816326531, |
| "grad_norm": 57370.44140625, |
| "learning_rate": 2.5510204081632656e-07, |
| "loss": 2.0908, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.06377551020408163, |
| "grad_norm": 40133.53125, |
| "learning_rate": 3.188775510204082e-07, |
| "loss": 2.0889, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.07653061224489796, |
| "grad_norm": 58377.3671875, |
| "learning_rate": 3.826530612244898e-07, |
| "loss": 2.0874, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.08928571428571429, |
| "grad_norm": 50888.7421875, |
| "learning_rate": 4.4642857142857147e-07, |
| "loss": 2.088, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.10204081632653061, |
| "grad_norm": 44107.75390625, |
| "learning_rate": 5.102040816326531e-07, |
| "loss": 2.0872, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.11479591836734694, |
| "grad_norm": 62939.18359375, |
| "learning_rate": 5.739795918367347e-07, |
| "loss": 2.0873, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.12755102040816327, |
| "grad_norm": 42077.53515625, |
| "learning_rate": 6.377551020408164e-07, |
| "loss": 2.0852, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.14030612244897958, |
| "grad_norm": 50233.00390625, |
| "learning_rate": 7.015306122448979e-07, |
| "loss": 2.0861, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.15306122448979592, |
| "grad_norm": 60839.4765625, |
| "learning_rate": 7.653061224489796e-07, |
| "loss": 2.0856, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.16581632653061223, |
| "grad_norm": 57499.3515625, |
| "learning_rate": 8.290816326530612e-07, |
| "loss": 2.0838, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.17857142857142858, |
| "grad_norm": 65030.765625, |
| "learning_rate": 8.928571428571429e-07, |
| "loss": 2.083, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.1913265306122449, |
| "grad_norm": 53192.35546875, |
| "learning_rate": 9.566326530612244e-07, |
| "loss": 2.0841, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.20408163265306123, |
| "grad_norm": 52991.80078125, |
| "learning_rate": 1.0204081632653063e-06, |
| "loss": 2.0817, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.21683673469387754, |
| "grad_norm": 48656.234375, |
| "learning_rate": 1.0841836734693879e-06, |
| "loss": 2.0813, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.22959183673469388, |
| "grad_norm": 63459.2421875, |
| "learning_rate": 1.1479591836734695e-06, |
| "loss": 2.081, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.2423469387755102, |
| "grad_norm": 47800.3515625, |
| "learning_rate": 1.211734693877551e-06, |
| "loss": 2.0775, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.25510204081632654, |
| "grad_norm": 64353.37890625, |
| "learning_rate": 1.2755102040816329e-06, |
| "loss": 2.0772, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.26785714285714285, |
| "grad_norm": 59652.29296875, |
| "learning_rate": 1.3392857142857143e-06, |
| "loss": 2.0779, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.28061224489795916, |
| "grad_norm": 44752.69140625, |
| "learning_rate": 1.4030612244897959e-06, |
| "loss": 2.0764, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.29336734693877553, |
| "grad_norm": 49883.0, |
| "learning_rate": 1.4668367346938777e-06, |
| "loss": 2.0763, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.30612244897959184, |
| "grad_norm": 65950.28125, |
| "learning_rate": 1.5306122448979593e-06, |
| "loss": 2.0739, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.31887755102040816, |
| "grad_norm": 61354.83203125, |
| "learning_rate": 1.5943877551020409e-06, |
| "loss": 2.0706, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.33163265306122447, |
| "grad_norm": 53169.91796875, |
| "learning_rate": 1.6581632653061225e-06, |
| "loss": 2.0709, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.34438775510204084, |
| "grad_norm": 55883.31640625, |
| "learning_rate": 1.7219387755102043e-06, |
| "loss": 2.0695, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.35714285714285715, |
| "grad_norm": 38134.765625, |
| "learning_rate": 1.7857142857142859e-06, |
| "loss": 2.0675, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.36989795918367346, |
| "grad_norm": 53920.7265625, |
| "learning_rate": 1.8494897959183675e-06, |
| "loss": 2.0659, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.3826530612244898, |
| "grad_norm": 68777.28125, |
| "learning_rate": 1.913265306122449e-06, |
| "loss": 2.0638, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.39540816326530615, |
| "grad_norm": 46247.8984375, |
| "learning_rate": 1.977040816326531e-06, |
| "loss": 2.0656, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.40816326530612246, |
| "grad_norm": 49636.04296875, |
| "learning_rate": 2.0408163265306125e-06, |
| "loss": 2.0621, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.42091836734693877, |
| "grad_norm": 51880.3515625, |
| "learning_rate": 2.104591836734694e-06, |
| "loss": 2.057, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.4336734693877551, |
| "grad_norm": 52808.734375, |
| "learning_rate": 2.1683673469387757e-06, |
| "loss": 2.0585, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.44642857142857145, |
| "grad_norm": 46398.125, |
| "learning_rate": 2.2321428571428573e-06, |
| "loss": 2.0505, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.45918367346938777, |
| "grad_norm": 59347.2578125, |
| "learning_rate": 2.295918367346939e-06, |
| "loss": 2.0487, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.4719387755102041, |
| "grad_norm": 58343.73828125, |
| "learning_rate": 2.3596938775510205e-06, |
| "loss": 2.0563, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.4846938775510204, |
| "grad_norm": 43836.42578125, |
| "learning_rate": 2.423469387755102e-06, |
| "loss": 2.0486, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.49744897959183676, |
| "grad_norm": 69661.5859375, |
| "learning_rate": 2.487244897959184e-06, |
| "loss": 2.0443, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.5102040816326531, |
| "grad_norm": 53888.24609375, |
| "learning_rate": 2.5510204081632657e-06, |
| "loss": 2.0475, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.5229591836734694, |
| "grad_norm": 50369.1640625, |
| "learning_rate": 2.6147959183673473e-06, |
| "loss": 2.035, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.5357142857142857, |
| "grad_norm": 62733.9609375, |
| "learning_rate": 2.6785714285714285e-06, |
| "loss": 2.0354, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.548469387755102, |
| "grad_norm": 48340.67578125, |
| "learning_rate": 2.74234693877551e-06, |
| "loss": 2.0373, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.5612244897959183, |
| "grad_norm": 50353.734375, |
| "learning_rate": 2.8061224489795917e-06, |
| "loss": 2.0389, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.5739795918367347, |
| "grad_norm": 62351.765625, |
| "learning_rate": 2.869897959183674e-06, |
| "loss": 2.0263, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.5867346938775511, |
| "grad_norm": 73855.609375, |
| "learning_rate": 2.9336734693877553e-06, |
| "loss": 2.0269, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.5994897959183674, |
| "grad_norm": 84417.859375, |
| "learning_rate": 2.997448979591837e-06, |
| "loss": 2.0311, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.6122448979591837, |
| "grad_norm": 57347.55078125, |
| "learning_rate": 3.0612244897959185e-06, |
| "loss": 2.0239, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.625, |
| "grad_norm": 65611.1875, |
| "learning_rate": 3.125e-06, |
| "loss": 2.0064, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.6377551020408163, |
| "grad_norm": 70810.75, |
| "learning_rate": 3.1887755102040818e-06, |
| "loss": 2.0224, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.6505102040816326, |
| "grad_norm": 46479.8828125, |
| "learning_rate": 3.2525510204081634e-06, |
| "loss": 2.0057, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.6632653061224489, |
| "grad_norm": 34124.89453125, |
| "learning_rate": 3.316326530612245e-06, |
| "loss": 2.0135, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.6760204081632653, |
| "grad_norm": 53196.58203125, |
| "learning_rate": 3.3801020408163266e-06, |
| "loss": 2.0087, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.6887755102040817, |
| "grad_norm": 64588.515625, |
| "learning_rate": 3.4438775510204086e-06, |
| "loss": 2.0006, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.701530612244898, |
| "grad_norm": 73780.546875, |
| "learning_rate": 3.50765306122449e-06, |
| "loss": 1.9965, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.7142857142857143, |
| "grad_norm": 63101.05859375, |
| "learning_rate": 3.5714285714285718e-06, |
| "loss": 2.0005, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.7270408163265306, |
| "grad_norm": 78616.109375, |
| "learning_rate": 3.6352040816326534e-06, |
| "loss": 1.9904, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.7397959183673469, |
| "grad_norm": 79120.6484375, |
| "learning_rate": 3.698979591836735e-06, |
| "loss": 1.9891, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.7525510204081632, |
| "grad_norm": 92711.09375, |
| "learning_rate": 3.7627551020408166e-06, |
| "loss": 1.9611, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.7653061224489796, |
| "grad_norm": 77935.4765625, |
| "learning_rate": 3.826530612244898e-06, |
| "loss": 1.9746, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.7780612244897959, |
| "grad_norm": 85825.375, |
| "learning_rate": 3.89030612244898e-06, |
| "loss": 1.9753, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.7908163265306123, |
| "grad_norm": 65178.671875, |
| "learning_rate": 3.954081632653062e-06, |
| "loss": 1.9534, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.8035714285714286, |
| "grad_norm": 71973.671875, |
| "learning_rate": 4.017857142857143e-06, |
| "loss": 1.9474, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.8163265306122449, |
| "grad_norm": 88983.125, |
| "learning_rate": 4.081632653061225e-06, |
| "loss": 1.9492, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.8290816326530612, |
| "grad_norm": 76345.84375, |
| "learning_rate": 4.145408163265306e-06, |
| "loss": 1.9513, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.8418367346938775, |
| "grad_norm": 64801.640625, |
| "learning_rate": 4.209183673469388e-06, |
| "loss": 1.9583, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.8545918367346939, |
| "grad_norm": 72903.8203125, |
| "learning_rate": 4.272959183673469e-06, |
| "loss": 1.9579, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.8673469387755102, |
| "grad_norm": 65284.796875, |
| "learning_rate": 4.336734693877551e-06, |
| "loss": 1.9272, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.8801020408163265, |
| "grad_norm": 65237.38671875, |
| "learning_rate": 4.400510204081633e-06, |
| "loss": 1.9065, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.8928571428571429, |
| "grad_norm": 79606.9296875, |
| "learning_rate": 4.464285714285715e-06, |
| "loss": 1.9013, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.9056122448979592, |
| "grad_norm": 76137.6328125, |
| "learning_rate": 4.528061224489797e-06, |
| "loss": 1.8892, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.9183673469387755, |
| "grad_norm": 40717.60546875, |
| "learning_rate": 4.591836734693878e-06, |
| "loss": 1.9072, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.9311224489795918, |
| "grad_norm": 110416.125, |
| "learning_rate": 4.65561224489796e-06, |
| "loss": 1.9024, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.9438775510204082, |
| "grad_norm": 99101.6875, |
| "learning_rate": 4.719387755102041e-06, |
| "loss": 1.8979, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.9566326530612245, |
| "grad_norm": 86707.7109375, |
| "learning_rate": 4.783163265306123e-06, |
| "loss": 1.878, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.9693877551020408, |
| "grad_norm": 85074.75, |
| "learning_rate": 4.846938775510204e-06, |
| "loss": 1.871, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.9821428571428571, |
| "grad_norm": 47668.7109375, |
| "learning_rate": 4.910714285714286e-06, |
| "loss": 1.8762, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.9948979591836735, |
| "grad_norm": 133846.6875, |
| "learning_rate": 4.974489795918368e-06, |
| "loss": 1.8344, |
| "step": 390 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_accuracy": 0.0857774206508638, |
| "eval_loss": 1.9574071168899536, |
| "eval_runtime": 313.0247, |
| "eval_samples_per_second": 15.903, |
| "eval_steps_per_second": 0.53, |
| "step": 392 |
| }, |
| { |
| "epoch": 1.0076530612244898, |
| "grad_norm": 95410.25, |
| "learning_rate": 5.0382653061224495e-06, |
| "loss": 1.8635, |
| "step": 395 |
| }, |
| { |
| "epoch": 1.0204081632653061, |
| "grad_norm": 86022.1484375, |
| "learning_rate": 5.1020408163265315e-06, |
| "loss": 1.8147, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.0331632653061225, |
| "grad_norm": 154860.171875, |
| "learning_rate": 5.165816326530613e-06, |
| "loss": 1.8016, |
| "step": 405 |
| }, |
| { |
| "epoch": 1.0459183673469388, |
| "grad_norm": 105390.4609375, |
| "learning_rate": 5.229591836734695e-06, |
| "loss": 1.8458, |
| "step": 410 |
| }, |
| { |
| "epoch": 1.058673469387755, |
| "grad_norm": 94511.6171875, |
| "learning_rate": 5.293367346938776e-06, |
| "loss": 1.7888, |
| "step": 415 |
| }, |
| { |
| "epoch": 1.0714285714285714, |
| "grad_norm": 93031.234375, |
| "learning_rate": 5.357142857142857e-06, |
| "loss": 1.7809, |
| "step": 420 |
| }, |
| { |
| "epoch": 1.0841836734693877, |
| "grad_norm": 71911.4296875, |
| "learning_rate": 5.420918367346939e-06, |
| "loss": 1.8523, |
| "step": 425 |
| }, |
| { |
| "epoch": 1.096938775510204, |
| "grad_norm": 59985.96875, |
| "learning_rate": 5.48469387755102e-06, |
| "loss": 1.7427, |
| "step": 430 |
| }, |
| { |
| "epoch": 1.1096938775510203, |
| "grad_norm": 86531.875, |
| "learning_rate": 5.548469387755102e-06, |
| "loss": 1.7646, |
| "step": 435 |
| }, |
| { |
| "epoch": 1.1224489795918366, |
| "grad_norm": 90977.859375, |
| "learning_rate": 5.6122448979591834e-06, |
| "loss": 1.7529, |
| "step": 440 |
| }, |
| { |
| "epoch": 1.135204081632653, |
| "grad_norm": 70691.375, |
| "learning_rate": 5.6760204081632655e-06, |
| "loss": 1.7754, |
| "step": 445 |
| }, |
| { |
| "epoch": 1.1479591836734695, |
| "grad_norm": 137077.8125, |
| "learning_rate": 5.739795918367348e-06, |
| "loss": 1.73, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.1607142857142858, |
| "grad_norm": 133976.8125, |
| "learning_rate": 5.8035714285714295e-06, |
| "loss": 1.7906, |
| "step": 455 |
| }, |
| { |
| "epoch": 1.1734693877551021, |
| "grad_norm": 84754.7421875, |
| "learning_rate": 5.867346938775511e-06, |
| "loss": 1.7722, |
| "step": 460 |
| }, |
| { |
| "epoch": 1.1862244897959184, |
| "grad_norm": 67924.859375, |
| "learning_rate": 5.931122448979593e-06, |
| "loss": 1.8274, |
| "step": 465 |
| }, |
| { |
| "epoch": 1.1989795918367347, |
| "grad_norm": 110309.015625, |
| "learning_rate": 5.994897959183674e-06, |
| "loss": 1.7558, |
| "step": 470 |
| }, |
| { |
| "epoch": 1.211734693877551, |
| "grad_norm": 108489.703125, |
| "learning_rate": 6.058673469387756e-06, |
| "loss": 1.7387, |
| "step": 475 |
| }, |
| { |
| "epoch": 1.2244897959183674, |
| "grad_norm": 99533.59375, |
| "learning_rate": 6.122448979591837e-06, |
| "loss": 1.7367, |
| "step": 480 |
| }, |
| { |
| "epoch": 1.2372448979591837, |
| "grad_norm": 85590.296875, |
| "learning_rate": 6.186224489795919e-06, |
| "loss": 1.7304, |
| "step": 485 |
| }, |
| { |
| "epoch": 1.25, |
| "grad_norm": 242811.53125, |
| "learning_rate": 6.25e-06, |
| "loss": 1.6764, |
| "step": 490 |
| }, |
| { |
| "epoch": 1.2627551020408163, |
| "grad_norm": 88326.6171875, |
| "learning_rate": 6.313775510204082e-06, |
| "loss": 1.7684, |
| "step": 495 |
| }, |
| { |
| "epoch": 1.2755102040816326, |
| "grad_norm": 64742.67578125, |
| "learning_rate": 6.3775510204081635e-06, |
| "loss": 1.788, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.288265306122449, |
| "grad_norm": 130764.1015625, |
| "learning_rate": 6.4413265306122455e-06, |
| "loss": 1.6374, |
| "step": 505 |
| }, |
| { |
| "epoch": 1.3010204081632653, |
| "grad_norm": 105262.7578125, |
| "learning_rate": 6.505102040816327e-06, |
| "loss": 1.7669, |
| "step": 510 |
| }, |
| { |
| "epoch": 1.3137755102040816, |
| "grad_norm": 132783.3125, |
| "learning_rate": 6.568877551020409e-06, |
| "loss": 1.7428, |
| "step": 515 |
| }, |
| { |
| "epoch": 1.3265306122448979, |
| "grad_norm": 109851.921875, |
| "learning_rate": 6.63265306122449e-06, |
| "loss": 1.7023, |
| "step": 520 |
| }, |
| { |
| "epoch": 1.3392857142857144, |
| "grad_norm": 65197.0859375, |
| "learning_rate": 6.696428571428571e-06, |
| "loss": 1.7407, |
| "step": 525 |
| }, |
| { |
| "epoch": 1.3520408163265305, |
| "grad_norm": 158283.9375, |
| "learning_rate": 6.760204081632653e-06, |
| "loss": 1.7629, |
| "step": 530 |
| }, |
| { |
| "epoch": 1.364795918367347, |
| "grad_norm": 121252.4140625, |
| "learning_rate": 6.823979591836736e-06, |
| "loss": 1.7242, |
| "step": 535 |
| }, |
| { |
| "epoch": 1.3775510204081631, |
| "grad_norm": 110898.328125, |
| "learning_rate": 6.887755102040817e-06, |
| "loss": 1.6922, |
| "step": 540 |
| }, |
| { |
| "epoch": 1.3903061224489797, |
| "grad_norm": 127766.8046875, |
| "learning_rate": 6.951530612244899e-06, |
| "loss": 1.7274, |
| "step": 545 |
| }, |
| { |
| "epoch": 1.403061224489796, |
| "grad_norm": 138821.59375, |
| "learning_rate": 7.01530612244898e-06, |
| "loss": 1.6859, |
| "step": 550 |
| }, |
| { |
| "epoch": 1.4158163265306123, |
| "grad_norm": 154254.75, |
| "learning_rate": 7.079081632653062e-06, |
| "loss": 1.6988, |
| "step": 555 |
| }, |
| { |
| "epoch": 1.4285714285714286, |
| "grad_norm": 122805.65625, |
| "learning_rate": 7.1428571428571436e-06, |
| "loss": 1.7003, |
| "step": 560 |
| }, |
| { |
| "epoch": 1.441326530612245, |
| "grad_norm": 35300.796875, |
| "learning_rate": 7.206632653061226e-06, |
| "loss": 1.7224, |
| "step": 565 |
| }, |
| { |
| "epoch": 1.4540816326530612, |
| "grad_norm": 125603.1015625, |
| "learning_rate": 7.270408163265307e-06, |
| "loss": 1.8201, |
| "step": 570 |
| }, |
| { |
| "epoch": 1.4668367346938775, |
| "grad_norm": 167185.671875, |
| "learning_rate": 7.334183673469388e-06, |
| "loss": 1.7959, |
| "step": 575 |
| }, |
| { |
| "epoch": 1.4795918367346939, |
| "grad_norm": 59943.359375, |
| "learning_rate": 7.39795918367347e-06, |
| "loss": 1.6133, |
| "step": 580 |
| }, |
| { |
| "epoch": 1.4923469387755102, |
| "grad_norm": 197770.234375, |
| "learning_rate": 7.461734693877551e-06, |
| "loss": 1.709, |
| "step": 585 |
| }, |
| { |
| "epoch": 1.5051020408163265, |
| "grad_norm": 182674.5, |
| "learning_rate": 7.525510204081633e-06, |
| "loss": 1.688, |
| "step": 590 |
| }, |
| { |
| "epoch": 1.5178571428571428, |
| "grad_norm": 90262.0390625, |
| "learning_rate": 7.589285714285714e-06, |
| "loss": 1.7639, |
| "step": 595 |
| }, |
| { |
| "epoch": 1.5306122448979593, |
| "grad_norm": 119569.0859375, |
| "learning_rate": 7.653061224489796e-06, |
| "loss": 1.7587, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.5433673469387754, |
| "grad_norm": 134583.546875, |
| "learning_rate": 7.716836734693878e-06, |
| "loss": 1.7174, |
| "step": 605 |
| }, |
| { |
| "epoch": 1.556122448979592, |
| "grad_norm": 123799.515625, |
| "learning_rate": 7.78061224489796e-06, |
| "loss": 1.668, |
| "step": 610 |
| }, |
| { |
| "epoch": 1.568877551020408, |
| "grad_norm": 124765.65625, |
| "learning_rate": 7.844387755102042e-06, |
| "loss": 1.691, |
| "step": 615 |
| }, |
| { |
| "epoch": 1.5816326530612246, |
| "grad_norm": 180459.28125, |
| "learning_rate": 7.908163265306124e-06, |
| "loss": 1.6133, |
| "step": 620 |
| }, |
| { |
| "epoch": 1.5943877551020407, |
| "grad_norm": 115367.0859375, |
| "learning_rate": 7.971938775510205e-06, |
| "loss": 1.7529, |
| "step": 625 |
| }, |
| { |
| "epoch": 1.6071428571428572, |
| "grad_norm": 66548.40625, |
| "learning_rate": 8.035714285714286e-06, |
| "loss": 1.7157, |
| "step": 630 |
| }, |
| { |
| "epoch": 1.6198979591836735, |
| "grad_norm": 152392.640625, |
| "learning_rate": 8.099489795918369e-06, |
| "loss": 1.7417, |
| "step": 635 |
| }, |
| { |
| "epoch": 1.6326530612244898, |
| "grad_norm": 131193.015625, |
| "learning_rate": 8.16326530612245e-06, |
| "loss": 1.7704, |
| "step": 640 |
| }, |
| { |
| "epoch": 1.6454081632653061, |
| "grad_norm": 154705.71875, |
| "learning_rate": 8.227040816326531e-06, |
| "loss": 1.6573, |
| "step": 645 |
| }, |
| { |
| "epoch": 1.6581632653061225, |
| "grad_norm": 113254.484375, |
| "learning_rate": 8.290816326530612e-06, |
| "loss": 1.6394, |
| "step": 650 |
| }, |
| { |
| "epoch": 1.6709183673469388, |
| "grad_norm": 163821.484375, |
| "learning_rate": 8.354591836734695e-06, |
| "loss": 1.6884, |
| "step": 655 |
| }, |
| { |
| "epoch": 1.683673469387755, |
| "grad_norm": 141981.828125, |
| "learning_rate": 8.418367346938776e-06, |
| "loss": 1.5955, |
| "step": 660 |
| }, |
| { |
| "epoch": 1.6964285714285714, |
| "grad_norm": 158287.6875, |
| "learning_rate": 8.482142857142858e-06, |
| "loss": 1.6885, |
| "step": 665 |
| }, |
| { |
| "epoch": 1.7091836734693877, |
| "grad_norm": 166876.4375, |
| "learning_rate": 8.545918367346939e-06, |
| "loss": 1.7253, |
| "step": 670 |
| }, |
| { |
| "epoch": 1.7219387755102042, |
| "grad_norm": 111882.171875, |
| "learning_rate": 8.609693877551022e-06, |
| "loss": 1.7057, |
| "step": 675 |
| }, |
| { |
| "epoch": 1.7346938775510203, |
| "grad_norm": 237840.828125, |
| "learning_rate": 8.673469387755103e-06, |
| "loss": 1.7132, |
| "step": 680 |
| }, |
| { |
| "epoch": 1.7474489795918369, |
| "grad_norm": 83344.59375, |
| "learning_rate": 8.737244897959184e-06, |
| "loss": 1.733, |
| "step": 685 |
| }, |
| { |
| "epoch": 1.760204081632653, |
| "grad_norm": 30312.462890625, |
| "learning_rate": 8.801020408163265e-06, |
| "loss": 1.7926, |
| "step": 690 |
| }, |
| { |
| "epoch": 1.7729591836734695, |
| "grad_norm": 89868.9609375, |
| "learning_rate": 8.864795918367348e-06, |
| "loss": 1.6389, |
| "step": 695 |
| }, |
| { |
| "epoch": 1.7857142857142856, |
| "grad_norm": 182723.09375, |
| "learning_rate": 8.92857142857143e-06, |
| "loss": 1.7533, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.7984693877551021, |
| "grad_norm": 104328.2421875, |
| "learning_rate": 8.992346938775512e-06, |
| "loss": 1.7424, |
| "step": 705 |
| }, |
| { |
| "epoch": 1.8112244897959182, |
| "grad_norm": 110120.609375, |
| "learning_rate": 9.056122448979593e-06, |
| "loss": 1.6961, |
| "step": 710 |
| }, |
| { |
| "epoch": 1.8239795918367347, |
| "grad_norm": 114349.7578125, |
| "learning_rate": 9.119897959183674e-06, |
| "loss": 1.7753, |
| "step": 715 |
| }, |
| { |
| "epoch": 1.836734693877551, |
| "grad_norm": 103603.8828125, |
| "learning_rate": 9.183673469387756e-06, |
| "loss": 1.777, |
| "step": 720 |
| }, |
| { |
| "epoch": 1.8494897959183674, |
| "grad_norm": 173858.828125, |
| "learning_rate": 9.247448979591837e-06, |
| "loss": 1.7218, |
| "step": 725 |
| }, |
| { |
| "epoch": 1.8622448979591837, |
| "grad_norm": 255108.96875, |
| "learning_rate": 9.31122448979592e-06, |
| "loss": 1.7319, |
| "step": 730 |
| }, |
| { |
| "epoch": 1.875, |
| "grad_norm": 38653.734375, |
| "learning_rate": 9.375000000000001e-06, |
| "loss": 1.6574, |
| "step": 735 |
| }, |
| { |
| "epoch": 1.8877551020408163, |
| "grad_norm": 141534.234375, |
| "learning_rate": 9.438775510204082e-06, |
| "loss": 1.7595, |
| "step": 740 |
| }, |
| { |
| "epoch": 1.9005102040816326, |
| "grad_norm": 182591.890625, |
| "learning_rate": 9.502551020408163e-06, |
| "loss": 1.6796, |
| "step": 745 |
| }, |
| { |
| "epoch": 1.913265306122449, |
| "grad_norm": 75052.21875, |
| "learning_rate": 9.566326530612246e-06, |
| "loss": 1.754, |
| "step": 750 |
| }, |
| { |
| "epoch": 1.9260204081632653, |
| "grad_norm": 108983.921875, |
| "learning_rate": 9.630102040816327e-06, |
| "loss": 1.6907, |
| "step": 755 |
| }, |
| { |
| "epoch": 1.9387755102040818, |
| "grad_norm": 110641.828125, |
| "learning_rate": 9.693877551020408e-06, |
| "loss": 1.7041, |
| "step": 760 |
| }, |
| { |
| "epoch": 1.9515306122448979, |
| "grad_norm": 119189.8203125, |
| "learning_rate": 9.75765306122449e-06, |
| "loss": 1.6054, |
| "step": 765 |
| }, |
| { |
| "epoch": 1.9642857142857144, |
| "grad_norm": 62365.9453125, |
| "learning_rate": 9.821428571428573e-06, |
| "loss": 1.6862, |
| "step": 770 |
| }, |
| { |
| "epoch": 1.9770408163265305, |
| "grad_norm": 103043.6171875, |
| "learning_rate": 9.885204081632654e-06, |
| "loss": 1.6596, |
| "step": 775 |
| }, |
| { |
| "epoch": 1.989795918367347, |
| "grad_norm": 117368.546875, |
| "learning_rate": 9.948979591836737e-06, |
| "loss": 1.6737, |
| "step": 780 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_accuracy": 0.0857774206508638, |
| "eval_loss": 1.8835231065750122, |
| "eval_runtime": 308.4538, |
| "eval_samples_per_second": 16.139, |
| "eval_steps_per_second": 0.538, |
| "step": 784 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 3920, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 10, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.99358138673152e+18, |
| "train_batch_size": 30, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|