| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 98.61538461538461, |
| "eval_steps": 100, |
| "global_step": 7100, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 1.3916083916083917, |
| "grad_norm": 0.7105618119239807, |
| "learning_rate": 0.000285, |
| "loss": 11.1682, |
| "step": 100 |
| }, |
| { |
| "epoch": 1.3916083916083917, |
| "eval_cer": 0.9903044405662207, |
| "eval_loss": 3.855257987976074, |
| "eval_runtime": 6.2271, |
| "eval_samples_per_second": 45.768, |
| "eval_steps_per_second": 5.781, |
| "step": 100 |
| }, |
| { |
| "epoch": 2.7832167832167833, |
| "grad_norm": 1.5593620538711548, |
| "learning_rate": 0.0002959285714285714, |
| "loss": 3.9311, |
| "step": 200 |
| }, |
| { |
| "epoch": 2.7832167832167833, |
| "eval_cer": 0.9903044405662207, |
| "eval_loss": 3.8240556716918945, |
| "eval_runtime": 6.1912, |
| "eval_samples_per_second": 46.033, |
| "eval_steps_per_second": 5.815, |
| "step": 200 |
| }, |
| { |
| "epoch": 4.1678321678321675, |
| "grad_norm": 1.5898423194885254, |
| "learning_rate": 0.0002916428571428571, |
| "loss": 3.8623, |
| "step": 300 |
| }, |
| { |
| "epoch": 4.1678321678321675, |
| "eval_cer": 0.9903044405662207, |
| "eval_loss": 3.7760262489318848, |
| "eval_runtime": 6.1779, |
| "eval_samples_per_second": 46.132, |
| "eval_steps_per_second": 5.827, |
| "step": 300 |
| }, |
| { |
| "epoch": 5.559440559440559, |
| "grad_norm": 2.1614158153533936, |
| "learning_rate": 0.00028735714285714286, |
| "loss": 3.7693, |
| "step": 400 |
| }, |
| { |
| "epoch": 5.559440559440559, |
| "eval_cer": 0.9903044405662207, |
| "eval_loss": 3.668625831604004, |
| "eval_runtime": 6.1599, |
| "eval_samples_per_second": 46.267, |
| "eval_steps_per_second": 5.844, |
| "step": 400 |
| }, |
| { |
| "epoch": 6.951048951048951, |
| "grad_norm": 1.3560220003128052, |
| "learning_rate": 0.00028307142857142854, |
| "loss": 3.671, |
| "step": 500 |
| }, |
| { |
| "epoch": 6.951048951048951, |
| "eval_cer": 0.9893348846228427, |
| "eval_loss": 3.590017080307007, |
| "eval_runtime": 6.1396, |
| "eval_samples_per_second": 46.42, |
| "eval_steps_per_second": 5.864, |
| "step": 500 |
| }, |
| { |
| "epoch": 8.335664335664335, |
| "grad_norm": 1.1032301187515259, |
| "learning_rate": 0.00027878571428571427, |
| "loss": 3.5618, |
| "step": 600 |
| }, |
| { |
| "epoch": 8.335664335664335, |
| "eval_cer": 0.9713011440760132, |
| "eval_loss": 3.5168509483337402, |
| "eval_runtime": 6.166, |
| "eval_samples_per_second": 46.221, |
| "eval_steps_per_second": 5.838, |
| "step": 600 |
| }, |
| { |
| "epoch": 9.727272727272727, |
| "grad_norm": 1.4139310121536255, |
| "learning_rate": 0.0002745, |
| "loss": 3.4994, |
| "step": 700 |
| }, |
| { |
| "epoch": 9.727272727272727, |
| "eval_cer": 0.9699437657552841, |
| "eval_loss": 3.3551578521728516, |
| "eval_runtime": 6.1552, |
| "eval_samples_per_second": 46.302, |
| "eval_steps_per_second": 5.849, |
| "step": 700 |
| }, |
| { |
| "epoch": 11.111888111888112, |
| "grad_norm": 0.8385369777679443, |
| "learning_rate": 0.0002702142857142857, |
| "loss": 3.3323, |
| "step": 800 |
| }, |
| { |
| "epoch": 11.111888111888112, |
| "eval_cer": 0.954043048283886, |
| "eval_loss": 3.138484001159668, |
| "eval_runtime": 6.0631, |
| "eval_samples_per_second": 47.005, |
| "eval_steps_per_second": 5.938, |
| "step": 800 |
| }, |
| { |
| "epoch": 12.503496503496503, |
| "grad_norm": 3.5932364463806152, |
| "learning_rate": 0.0002659285714285714, |
| "loss": 3.163, |
| "step": 900 |
| }, |
| { |
| "epoch": 12.503496503496503, |
| "eval_cer": 0.9185573007562536, |
| "eval_loss": 2.9224491119384766, |
| "eval_runtime": 6.0879, |
| "eval_samples_per_second": 46.814, |
| "eval_steps_per_second": 5.913, |
| "step": 900 |
| }, |
| { |
| "epoch": 13.895104895104895, |
| "grad_norm": 1.8399698734283447, |
| "learning_rate": 0.0002616428571428571, |
| "loss": 2.7901, |
| "step": 1000 |
| }, |
| { |
| "epoch": 13.895104895104895, |
| "eval_cer": 0.7828194686833431, |
| "eval_loss": 2.180238723754883, |
| "eval_runtime": 6.0259, |
| "eval_samples_per_second": 47.296, |
| "eval_steps_per_second": 5.974, |
| "step": 1000 |
| }, |
| { |
| "epoch": 15.27972027972028, |
| "grad_norm": 1.625369668006897, |
| "learning_rate": 0.00025735714285714283, |
| "loss": 2.3425, |
| "step": 1100 |
| }, |
| { |
| "epoch": 15.27972027972028, |
| "eval_cer": 0.6528989722707, |
| "eval_loss": 1.8405641317367554, |
| "eval_runtime": 6.0991, |
| "eval_samples_per_second": 46.728, |
| "eval_steps_per_second": 5.903, |
| "step": 1100 |
| }, |
| { |
| "epoch": 16.67132867132867, |
| "grad_norm": 2.625293016433716, |
| "learning_rate": 0.0002530714285714285, |
| "loss": 2.0608, |
| "step": 1200 |
| }, |
| { |
| "epoch": 16.67132867132867, |
| "eval_cer": 0.6329261198371146, |
| "eval_loss": 1.6505399942398071, |
| "eval_runtime": 6.1263, |
| "eval_samples_per_second": 46.52, |
| "eval_steps_per_second": 5.876, |
| "step": 1200 |
| }, |
| { |
| "epoch": 18.055944055944057, |
| "grad_norm": 1.92220139503479, |
| "learning_rate": 0.00024878571428571425, |
| "loss": 1.8813, |
| "step": 1300 |
| }, |
| { |
| "epoch": 18.055944055944057, |
| "eval_cer": 0.5714562730269537, |
| "eval_loss": 1.4768792390823364, |
| "eval_runtime": 6.1089, |
| "eval_samples_per_second": 46.653, |
| "eval_steps_per_second": 5.893, |
| "step": 1300 |
| }, |
| { |
| "epoch": 19.447552447552447, |
| "grad_norm": 3.1366982460021973, |
| "learning_rate": 0.0002445, |
| "loss": 1.6705, |
| "step": 1400 |
| }, |
| { |
| "epoch": 19.447552447552447, |
| "eval_cer": 0.5580764010083382, |
| "eval_loss": 1.479285478591919, |
| "eval_runtime": 6.1149, |
| "eval_samples_per_second": 46.608, |
| "eval_steps_per_second": 5.887, |
| "step": 1400 |
| }, |
| { |
| "epoch": 20.83916083916084, |
| "grad_norm": 2.116931200027466, |
| "learning_rate": 0.0002402142857142857, |
| "loss": 1.558, |
| "step": 1500 |
| }, |
| { |
| "epoch": 20.83916083916084, |
| "eval_cer": 0.4969943765755284, |
| "eval_loss": 1.3079291582107544, |
| "eval_runtime": 6.0774, |
| "eval_samples_per_second": 46.895, |
| "eval_steps_per_second": 5.924, |
| "step": 1500 |
| }, |
| { |
| "epoch": 22.223776223776223, |
| "grad_norm": 4.369190692901611, |
| "learning_rate": 0.00023592857142857142, |
| "loss": 1.4213, |
| "step": 1600 |
| }, |
| { |
| "epoch": 22.223776223776223, |
| "eval_cer": 0.49466744231142135, |
| "eval_loss": 1.3551599979400635, |
| "eval_runtime": 6.1085, |
| "eval_samples_per_second": 46.657, |
| "eval_steps_per_second": 5.893, |
| "step": 1600 |
| }, |
| { |
| "epoch": 23.615384615384617, |
| "grad_norm": 2.938127279281616, |
| "learning_rate": 0.00023164285714285713, |
| "loss": 1.3122, |
| "step": 1700 |
| }, |
| { |
| "epoch": 23.615384615384617, |
| "eval_cer": 0.4355245297653675, |
| "eval_loss": 1.236782431602478, |
| "eval_runtime": 6.1138, |
| "eval_samples_per_second": 46.616, |
| "eval_steps_per_second": 5.888, |
| "step": 1700 |
| }, |
| { |
| "epoch": 25.0, |
| "grad_norm": 4.156201362609863, |
| "learning_rate": 0.00022735714285714286, |
| "loss": 1.2303, |
| "step": 1800 |
| }, |
| { |
| "epoch": 25.0, |
| "eval_cer": 0.4347488850106651, |
| "eval_loss": 1.210758924484253, |
| "eval_runtime": 6.0871, |
| "eval_samples_per_second": 46.82, |
| "eval_steps_per_second": 5.914, |
| "step": 1800 |
| }, |
| { |
| "epoch": 26.39160839160839, |
| "grad_norm": 3.738050937652588, |
| "learning_rate": 0.00022307142857142854, |
| "loss": 1.1152, |
| "step": 1900 |
| }, |
| { |
| "epoch": 26.39160839160839, |
| "eval_cer": 0.4306767500484778, |
| "eval_loss": 1.2177391052246094, |
| "eval_runtime": 6.1061, |
| "eval_samples_per_second": 46.674, |
| "eval_steps_per_second": 5.896, |
| "step": 1900 |
| }, |
| { |
| "epoch": 27.783216783216783, |
| "grad_norm": 3.721745014190674, |
| "learning_rate": 0.00021878571428571428, |
| "loss": 1.0441, |
| "step": 2000 |
| }, |
| { |
| "epoch": 27.783216783216783, |
| "eval_cer": 0.4291254605390731, |
| "eval_loss": 1.3235960006713867, |
| "eval_runtime": 6.0708, |
| "eval_samples_per_second": 46.946, |
| "eval_steps_per_second": 5.93, |
| "step": 2000 |
| }, |
| { |
| "epoch": 29.167832167832167, |
| "grad_norm": 2.746555805206299, |
| "learning_rate": 0.00021449999999999998, |
| "loss": 0.9626, |
| "step": 2100 |
| }, |
| { |
| "epoch": 29.167832167832167, |
| "eval_cer": 0.4157455885204576, |
| "eval_loss": 1.2737609148025513, |
| "eval_runtime": 6.1093, |
| "eval_samples_per_second": 46.65, |
| "eval_steps_per_second": 5.893, |
| "step": 2100 |
| }, |
| { |
| "epoch": 30.55944055944056, |
| "grad_norm": 2.8237345218658447, |
| "learning_rate": 0.0002102142857142857, |
| "loss": 0.8987, |
| "step": 2200 |
| }, |
| { |
| "epoch": 30.55944055944056, |
| "eval_cer": 0.4190420787279426, |
| "eval_loss": 1.2683167457580566, |
| "eval_runtime": 6.1368, |
| "eval_samples_per_second": 46.441, |
| "eval_steps_per_second": 5.866, |
| "step": 2200 |
| }, |
| { |
| "epoch": 31.95104895104895, |
| "grad_norm": 5.561631679534912, |
| "learning_rate": 0.0002059285714285714, |
| "loss": 0.8367, |
| "step": 2300 |
| }, |
| { |
| "epoch": 31.95104895104895, |
| "eval_cer": 0.41438821019972855, |
| "eval_loss": 1.2570189237594604, |
| "eval_runtime": 6.095, |
| "eval_samples_per_second": 46.76, |
| "eval_steps_per_second": 5.906, |
| "step": 2300 |
| }, |
| { |
| "epoch": 33.33566433566433, |
| "grad_norm": 1.7682024240493774, |
| "learning_rate": 0.00020164285714285713, |
| "loss": 0.7617, |
| "step": 2400 |
| }, |
| { |
| "epoch": 33.33566433566433, |
| "eval_cer": 0.3876284661624976, |
| "eval_loss": 1.233074426651001, |
| "eval_runtime": 6.0871, |
| "eval_samples_per_second": 46.821, |
| "eval_steps_per_second": 5.914, |
| "step": 2400 |
| }, |
| { |
| "epoch": 34.72727272727273, |
| "grad_norm": 3.293351888656616, |
| "learning_rate": 0.00019735714285714284, |
| "loss": 0.7069, |
| "step": 2500 |
| }, |
| { |
| "epoch": 34.72727272727273, |
| "eval_cer": 0.40372309482257124, |
| "eval_loss": 1.328414797782898, |
| "eval_runtime": 6.0987, |
| "eval_samples_per_second": 46.731, |
| "eval_steps_per_second": 5.903, |
| "step": 2500 |
| }, |
| { |
| "epoch": 36.11188811188811, |
| "grad_norm": 1.8948358297348022, |
| "learning_rate": 0.00019307142857142854, |
| "loss": 0.6874, |
| "step": 2600 |
| }, |
| { |
| "epoch": 36.11188811188811, |
| "eval_cer": 0.38181113050223, |
| "eval_loss": 1.2947708368301392, |
| "eval_runtime": 6.0589, |
| "eval_samples_per_second": 47.038, |
| "eval_steps_per_second": 5.942, |
| "step": 2600 |
| }, |
| { |
| "epoch": 37.50349650349651, |
| "grad_norm": 2.3135173320770264, |
| "learning_rate": 0.00018878571428571428, |
| "loss": 0.6615, |
| "step": 2700 |
| }, |
| { |
| "epoch": 37.50349650349651, |
| "eval_cer": 0.3977118479736281, |
| "eval_loss": 1.299822211265564, |
| "eval_runtime": 6.0553, |
| "eval_samples_per_second": 47.066, |
| "eval_steps_per_second": 5.945, |
| "step": 2700 |
| }, |
| { |
| "epoch": 38.89510489510489, |
| "grad_norm": 8.07669448852539, |
| "learning_rate": 0.00018449999999999999, |
| "loss": 0.6086, |
| "step": 2800 |
| }, |
| { |
| "epoch": 38.89510489510489, |
| "eval_cer": 0.3757998836532868, |
| "eval_loss": 1.3369208574295044, |
| "eval_runtime": 6.0593, |
| "eval_samples_per_second": 47.035, |
| "eval_steps_per_second": 5.941, |
| "step": 2800 |
| }, |
| { |
| "epoch": 40.27972027972028, |
| "grad_norm": 2.3282470703125, |
| "learning_rate": 0.00018021428571428572, |
| "loss": 0.5804, |
| "step": 2900 |
| }, |
| { |
| "epoch": 40.27972027972028, |
| "eval_cer": 0.38375024238898586, |
| "eval_loss": 1.2814927101135254, |
| "eval_runtime": 6.0834, |
| "eval_samples_per_second": 46.849, |
| "eval_steps_per_second": 5.918, |
| "step": 2900 |
| }, |
| { |
| "epoch": 41.67132867132867, |
| "grad_norm": 5.158154010772705, |
| "learning_rate": 0.0001759285714285714, |
| "loss": 0.548, |
| "step": 3000 |
| }, |
| { |
| "epoch": 41.67132867132867, |
| "eval_cer": 0.37657552840798914, |
| "eval_loss": 1.3390411138534546, |
| "eval_runtime": 6.0871, |
| "eval_samples_per_second": 46.82, |
| "eval_steps_per_second": 5.914, |
| "step": 3000 |
| }, |
| { |
| "epoch": 43.05594405594405, |
| "grad_norm": 1.2800214290618896, |
| "learning_rate": 0.00017164285714285713, |
| "loss": 0.5239, |
| "step": 3100 |
| }, |
| { |
| "epoch": 43.05594405594405, |
| "eval_cer": 0.367267791351561, |
| "eval_loss": 1.257192850112915, |
| "eval_runtime": 6.0964, |
| "eval_samples_per_second": 46.749, |
| "eval_steps_per_second": 5.905, |
| "step": 3100 |
| }, |
| { |
| "epoch": 44.44755244755245, |
| "grad_norm": 4.9716010093688965, |
| "learning_rate": 0.00016735714285714284, |
| "loss": 0.4983, |
| "step": 3200 |
| }, |
| { |
| "epoch": 44.44755244755245, |
| "eval_cer": 0.3670738801628854, |
| "eval_loss": 1.295488715171814, |
| "eval_runtime": 6.034, |
| "eval_samples_per_second": 47.232, |
| "eval_steps_per_second": 5.966, |
| "step": 3200 |
| }, |
| { |
| "epoch": 45.83916083916084, |
| "grad_norm": 2.536774158477783, |
| "learning_rate": 0.0001631142857142857, |
| "loss": 0.4793, |
| "step": 3300 |
| }, |
| { |
| "epoch": 45.83916083916084, |
| "eval_cer": 0.372891215823153, |
| "eval_loss": 1.3562514781951904, |
| "eval_runtime": 6.0912, |
| "eval_samples_per_second": 46.789, |
| "eval_steps_per_second": 5.91, |
| "step": 3300 |
| }, |
| { |
| "epoch": 47.22377622377623, |
| "grad_norm": 3.6583845615386963, |
| "learning_rate": 0.00015882857142857142, |
| "loss": 0.438, |
| "step": 3400 |
| }, |
| { |
| "epoch": 47.22377622377623, |
| "eval_cer": 0.3915066899360093, |
| "eval_loss": 1.415280818939209, |
| "eval_runtime": 6.0618, |
| "eval_samples_per_second": 47.016, |
| "eval_steps_per_second": 5.939, |
| "step": 3400 |
| }, |
| { |
| "epoch": 48.61538461538461, |
| "grad_norm": 2.472052574157715, |
| "learning_rate": 0.00015454285714285712, |
| "loss": 0.4274, |
| "step": 3500 |
| }, |
| { |
| "epoch": 48.61538461538461, |
| "eval_cer": 0.36629823540818307, |
| "eval_loss": 1.319765567779541, |
| "eval_runtime": 6.1106, |
| "eval_samples_per_second": 46.64, |
| "eval_steps_per_second": 5.891, |
| "step": 3500 |
| }, |
| { |
| "epoch": 50.0, |
| "grad_norm": 7.336581707000732, |
| "learning_rate": 0.00015025714285714286, |
| "loss": 0.4064, |
| "step": 3600 |
| }, |
| { |
| "epoch": 50.0, |
| "eval_cer": 0.3814233081248788, |
| "eval_loss": 1.4350632429122925, |
| "eval_runtime": 6.098, |
| "eval_samples_per_second": 46.736, |
| "eval_steps_per_second": 5.904, |
| "step": 3600 |
| }, |
| { |
| "epoch": 51.39160839160839, |
| "grad_norm": 2.7026124000549316, |
| "learning_rate": 0.00014597142857142856, |
| "loss": 0.3812, |
| "step": 3700 |
| }, |
| { |
| "epoch": 51.39160839160839, |
| "eval_cer": 0.36203218925732017, |
| "eval_loss": 1.351439356803894, |
| "eval_runtime": 6.0572, |
| "eval_samples_per_second": 47.051, |
| "eval_steps_per_second": 5.943, |
| "step": 3700 |
| }, |
| { |
| "epoch": 52.78321678321678, |
| "grad_norm": 1.425048589706421, |
| "learning_rate": 0.00014168571428571427, |
| "loss": 0.3753, |
| "step": 3800 |
| }, |
| { |
| "epoch": 52.78321678321678, |
| "eval_cer": 0.3492340508047314, |
| "eval_loss": 1.3715204000473022, |
| "eval_runtime": 6.0504, |
| "eval_samples_per_second": 47.104, |
| "eval_steps_per_second": 5.95, |
| "step": 3800 |
| }, |
| { |
| "epoch": 54.16783216783217, |
| "grad_norm": 2.945066452026367, |
| "learning_rate": 0.0001374, |
| "loss": 0.3549, |
| "step": 3900 |
| }, |
| { |
| "epoch": 54.16783216783217, |
| "eval_cer": 0.36494085708745394, |
| "eval_loss": 1.4132966995239258, |
| "eval_runtime": 6.0971, |
| "eval_samples_per_second": 46.743, |
| "eval_steps_per_second": 5.904, |
| "step": 3900 |
| }, |
| { |
| "epoch": 55.55944055944056, |
| "grad_norm": 1.2087554931640625, |
| "learning_rate": 0.0001331142857142857, |
| "loss": 0.3262, |
| "step": 4000 |
| }, |
| { |
| "epoch": 55.55944055944056, |
| "eval_cer": 0.3573783207291061, |
| "eval_loss": 1.4259963035583496, |
| "eval_runtime": 6.0615, |
| "eval_samples_per_second": 47.018, |
| "eval_steps_per_second": 5.939, |
| "step": 4000 |
| }, |
| { |
| "epoch": 56.95104895104895, |
| "grad_norm": 6.813267230987549, |
| "learning_rate": 0.00012882857142857142, |
| "loss": 0.3296, |
| "step": 4100 |
| }, |
| { |
| "epoch": 56.95104895104895, |
| "eval_cer": 0.35524529765367463, |
| "eval_loss": 1.5134129524230957, |
| "eval_runtime": 6.0512, |
| "eval_samples_per_second": 47.098, |
| "eval_steps_per_second": 5.949, |
| "step": 4100 |
| }, |
| { |
| "epoch": 58.33566433566433, |
| "grad_norm": 1.582381248474121, |
| "learning_rate": 0.00012454285714285713, |
| "loss": 0.3136, |
| "step": 4200 |
| }, |
| { |
| "epoch": 58.33566433566433, |
| "eval_cer": 0.35873569904983515, |
| "eval_loss": 1.4695625305175781, |
| "eval_runtime": 6.0851, |
| "eval_samples_per_second": 46.836, |
| "eval_steps_per_second": 5.916, |
| "step": 4200 |
| }, |
| { |
| "epoch": 59.72727272727273, |
| "grad_norm": 0.9694690108299255, |
| "learning_rate": 0.00012025714285714285, |
| "loss": 0.3009, |
| "step": 4300 |
| }, |
| { |
| "epoch": 59.72727272727273, |
| "eval_cer": 0.3554392088423502, |
| "eval_loss": 1.432596206665039, |
| "eval_runtime": 6.0669, |
| "eval_samples_per_second": 46.976, |
| "eval_steps_per_second": 5.934, |
| "step": 4300 |
| }, |
| { |
| "epoch": 61.11188811188811, |
| "grad_norm": 1.6826478242874146, |
| "learning_rate": 0.00011597142857142855, |
| "loss": 0.2764, |
| "step": 4400 |
| }, |
| { |
| "epoch": 61.11188811188811, |
| "eval_cer": 0.35718440954043046, |
| "eval_loss": 1.4485613107681274, |
| "eval_runtime": 6.0638, |
| "eval_samples_per_second": 47.0, |
| "eval_steps_per_second": 5.937, |
| "step": 4400 |
| }, |
| { |
| "epoch": 62.50349650349651, |
| "grad_norm": 1.2600972652435303, |
| "learning_rate": 0.00011168571428571427, |
| "loss": 0.2738, |
| "step": 4500 |
| }, |
| { |
| "epoch": 62.50349650349651, |
| "eval_cer": 0.3593174326158619, |
| "eval_loss": 1.446284294128418, |
| "eval_runtime": 6.0798, |
| "eval_samples_per_second": 46.876, |
| "eval_steps_per_second": 5.921, |
| "step": 4500 |
| }, |
| { |
| "epoch": 63.89510489510489, |
| "grad_norm": 2.8400301933288574, |
| "learning_rate": 0.00010739999999999998, |
| "loss": 0.2574, |
| "step": 4600 |
| }, |
| { |
| "epoch": 63.89510489510489, |
| "eval_cer": 0.358347876672484, |
| "eval_loss": 1.4303468465805054, |
| "eval_runtime": 6.1117, |
| "eval_samples_per_second": 46.632, |
| "eval_steps_per_second": 5.89, |
| "step": 4600 |
| }, |
| { |
| "epoch": 65.27972027972028, |
| "grad_norm": 1.22808039188385, |
| "learning_rate": 0.0001031142857142857, |
| "loss": 0.2397, |
| "step": 4700 |
| }, |
| { |
| "epoch": 65.27972027972028, |
| "eval_cer": 0.3445801822765174, |
| "eval_loss": 1.4538311958312988, |
| "eval_runtime": 6.0831, |
| "eval_samples_per_second": 46.851, |
| "eval_steps_per_second": 5.918, |
| "step": 4700 |
| }, |
| { |
| "epoch": 66.67132867132867, |
| "grad_norm": 1.9458190202713013, |
| "learning_rate": 9.882857142857141e-05, |
| "loss": 0.2474, |
| "step": 4800 |
| }, |
| { |
| "epoch": 66.67132867132867, |
| "eval_cer": 0.3496218731820826, |
| "eval_loss": 1.4416499137878418, |
| "eval_runtime": 6.1066, |
| "eval_samples_per_second": 46.671, |
| "eval_steps_per_second": 5.895, |
| "step": 4800 |
| }, |
| { |
| "epoch": 68.05594405594405, |
| "grad_norm": 1.5397316217422485, |
| "learning_rate": 9.454285714285714e-05, |
| "loss": 0.2212, |
| "step": 4900 |
| }, |
| { |
| "epoch": 68.05594405594405, |
| "eval_cer": 0.34477409346519294, |
| "eval_loss": 1.476562738418579, |
| "eval_runtime": 6.125, |
| "eval_samples_per_second": 46.53, |
| "eval_steps_per_second": 5.878, |
| "step": 4900 |
| }, |
| { |
| "epoch": 69.44755244755245, |
| "grad_norm": 4.152817249298096, |
| "learning_rate": 9.025714285714285e-05, |
| "loss": 0.2173, |
| "step": 5000 |
| }, |
| { |
| "epoch": 69.44755244755245, |
| "eval_cer": 0.3496218731820826, |
| "eval_loss": 1.4784653186798096, |
| "eval_runtime": 6.0989, |
| "eval_samples_per_second": 46.73, |
| "eval_steps_per_second": 5.903, |
| "step": 5000 |
| }, |
| { |
| "epoch": 70.83916083916084, |
| "grad_norm": 1.8647962808609009, |
| "learning_rate": 8.597142857142857e-05, |
| "loss": 0.2138, |
| "step": 5100 |
| }, |
| { |
| "epoch": 70.83916083916084, |
| "eval_cer": 0.3581539654838084, |
| "eval_loss": 1.4859139919281006, |
| "eval_runtime": 6.0911, |
| "eval_samples_per_second": 46.79, |
| "eval_steps_per_second": 5.91, |
| "step": 5100 |
| }, |
| { |
| "epoch": 72.22377622377623, |
| "grad_norm": 6.688198089599609, |
| "learning_rate": 8.168571428571428e-05, |
| "loss": 0.2037, |
| "step": 5200 |
| }, |
| { |
| "epoch": 72.22377622377623, |
| "eval_cer": 0.3500096955594338, |
| "eval_loss": 1.5022231340408325, |
| "eval_runtime": 6.1063, |
| "eval_samples_per_second": 46.673, |
| "eval_steps_per_second": 5.896, |
| "step": 5200 |
| }, |
| { |
| "epoch": 73.61538461538461, |
| "grad_norm": 2.6784660816192627, |
| "learning_rate": 7.74e-05, |
| "loss": 0.194, |
| "step": 5300 |
| }, |
| { |
| "epoch": 73.61538461538461, |
| "eval_cer": 0.34904013961605584, |
| "eval_loss": 1.4964337348937988, |
| "eval_runtime": 6.0868, |
| "eval_samples_per_second": 46.823, |
| "eval_steps_per_second": 5.914, |
| "step": 5300 |
| }, |
| { |
| "epoch": 75.0, |
| "grad_norm": 2.1717565059661865, |
| "learning_rate": 7.31142857142857e-05, |
| "loss": 0.1758, |
| "step": 5400 |
| }, |
| { |
| "epoch": 75.0, |
| "eval_cer": 0.35524529765367463, |
| "eval_loss": 1.5645275115966797, |
| "eval_runtime": 6.0591, |
| "eval_samples_per_second": 47.037, |
| "eval_steps_per_second": 5.941, |
| "step": 5400 |
| }, |
| { |
| "epoch": 76.39160839160839, |
| "grad_norm": 3.3418147563934326, |
| "learning_rate": 6.882857142857142e-05, |
| "loss": 0.1693, |
| "step": 5500 |
| }, |
| { |
| "epoch": 76.39160839160839, |
| "eval_cer": 0.3492340508047314, |
| "eval_loss": 1.5214943885803223, |
| "eval_runtime": 6.0965, |
| "eval_samples_per_second": 46.748, |
| "eval_steps_per_second": 5.905, |
| "step": 5500 |
| }, |
| { |
| "epoch": 77.78321678321679, |
| "grad_norm": 1.1721317768096924, |
| "learning_rate": 6.454285714285713e-05, |
| "loss": 0.1682, |
| "step": 5600 |
| }, |
| { |
| "epoch": 77.78321678321679, |
| "eval_cer": 0.34361062633313944, |
| "eval_loss": 1.557164192199707, |
| "eval_runtime": 6.0786, |
| "eval_samples_per_second": 46.886, |
| "eval_steps_per_second": 5.922, |
| "step": 5600 |
| }, |
| { |
| "epoch": 79.16783216783217, |
| "grad_norm": 1.2348577976226807, |
| "learning_rate": 6.0257142857142846e-05, |
| "loss": 0.1616, |
| "step": 5700 |
| }, |
| { |
| "epoch": 79.16783216783217, |
| "eval_cer": 0.3461314717859221, |
| "eval_loss": 1.497072696685791, |
| "eval_runtime": 6.0713, |
| "eval_samples_per_second": 46.942, |
| "eval_steps_per_second": 5.93, |
| "step": 5700 |
| }, |
| { |
| "epoch": 80.55944055944056, |
| "grad_norm": 1.101247787475586, |
| "learning_rate": 5.597142857142857e-05, |
| "loss": 0.1625, |
| "step": 5800 |
| }, |
| { |
| "epoch": 80.55944055944056, |
| "eval_cer": 0.3515609850688385, |
| "eval_loss": 1.5326788425445557, |
| "eval_runtime": 6.0886, |
| "eval_samples_per_second": 46.809, |
| "eval_steps_per_second": 5.913, |
| "step": 5800 |
| }, |
| { |
| "epoch": 81.95104895104895, |
| "grad_norm": 4.953105449676514, |
| "learning_rate": 5.168571428571428e-05, |
| "loss": 0.1432, |
| "step": 5900 |
| }, |
| { |
| "epoch": 81.95104895104895, |
| "eval_cer": 0.35059142912546054, |
| "eval_loss": 1.5595433712005615, |
| "eval_runtime": 6.078, |
| "eval_samples_per_second": 46.89, |
| "eval_steps_per_second": 5.923, |
| "step": 5900 |
| }, |
| { |
| "epoch": 83.33566433566433, |
| "grad_norm": 1.2514948844909668, |
| "learning_rate": 4.7399999999999993e-05, |
| "loss": 0.1348, |
| "step": 6000 |
| }, |
| { |
| "epoch": 83.33566433566433, |
| "eval_cer": 0.3482644948613535, |
| "eval_loss": 1.556195616722107, |
| "eval_runtime": 6.0756, |
| "eval_samples_per_second": 46.909, |
| "eval_steps_per_second": 5.925, |
| "step": 6000 |
| }, |
| { |
| "epoch": 84.72727272727273, |
| "grad_norm": 0.5957698225975037, |
| "learning_rate": 4.311428571428571e-05, |
| "loss": 0.137, |
| "step": 6100 |
| }, |
| { |
| "epoch": 84.72727272727273, |
| "eval_cer": 0.3484584060500291, |
| "eval_loss": 1.590181827545166, |
| "eval_runtime": 6.0761, |
| "eval_samples_per_second": 46.905, |
| "eval_steps_per_second": 5.925, |
| "step": 6100 |
| }, |
| { |
| "epoch": 86.1118881118881, |
| "grad_norm": 2.8553950786590576, |
| "learning_rate": 3.882857142857142e-05, |
| "loss": 0.1263, |
| "step": 6200 |
| }, |
| { |
| "epoch": 86.1118881118881, |
| "eval_cer": 0.35214271863486524, |
| "eval_loss": 1.5852645635604858, |
| "eval_runtime": 6.1244, |
| "eval_samples_per_second": 46.535, |
| "eval_steps_per_second": 5.878, |
| "step": 6200 |
| }, |
| { |
| "epoch": 87.5034965034965, |
| "grad_norm": 0.7932090759277344, |
| "learning_rate": 3.454285714285714e-05, |
| "loss": 0.1271, |
| "step": 6300 |
| }, |
| { |
| "epoch": 87.5034965034965, |
| "eval_cer": 0.3488462284273803, |
| "eval_loss": 1.597654938697815, |
| "eval_runtime": 6.1436, |
| "eval_samples_per_second": 46.389, |
| "eval_steps_per_second": 5.86, |
| "step": 6300 |
| }, |
| { |
| "epoch": 88.8951048951049, |
| "grad_norm": 2.1244022846221924, |
| "learning_rate": 3.0257142857142855e-05, |
| "loss": 0.123, |
| "step": 6400 |
| }, |
| { |
| "epoch": 88.8951048951049, |
| "eval_cer": 0.3498157843707582, |
| "eval_loss": 1.6023805141448975, |
| "eval_runtime": 6.0927, |
| "eval_samples_per_second": 46.777, |
| "eval_steps_per_second": 5.909, |
| "step": 6400 |
| }, |
| { |
| "epoch": 90.27972027972028, |
| "grad_norm": 1.3532764911651611, |
| "learning_rate": 2.5971428571428572e-05, |
| "loss": 0.117, |
| "step": 6500 |
| }, |
| { |
| "epoch": 90.27972027972028, |
| "eval_cer": 0.3535000969555943, |
| "eval_loss": 1.6092747449874878, |
| "eval_runtime": 6.1301, |
| "eval_samples_per_second": 46.492, |
| "eval_steps_per_second": 5.873, |
| "step": 6500 |
| }, |
| { |
| "epoch": 91.67132867132867, |
| "grad_norm": 3.1814770698547363, |
| "learning_rate": 2.1685714285714286e-05, |
| "loss": 0.1077, |
| "step": 6600 |
| }, |
| { |
| "epoch": 91.67132867132867, |
| "eval_cer": 0.35194880744618967, |
| "eval_loss": 1.5807358026504517, |
| "eval_runtime": 6.0891, |
| "eval_samples_per_second": 46.805, |
| "eval_steps_per_second": 5.912, |
| "step": 6600 |
| }, |
| { |
| "epoch": 93.05594405594405, |
| "grad_norm": 1.0375070571899414, |
| "learning_rate": 1.74e-05, |
| "loss": 0.1072, |
| "step": 6700 |
| }, |
| { |
| "epoch": 93.05594405594405, |
| "eval_cer": 0.3476827612953267, |
| "eval_loss": 1.580068588256836, |
| "eval_runtime": 6.1035, |
| "eval_samples_per_second": 46.695, |
| "eval_steps_per_second": 5.898, |
| "step": 6700 |
| }, |
| { |
| "epoch": 94.44755244755245, |
| "grad_norm": 1.3363580703735352, |
| "learning_rate": 1.3114285714285713e-05, |
| "loss": 0.1063, |
| "step": 6800 |
| }, |
| { |
| "epoch": 94.44755244755245, |
| "eval_cer": 0.35020360674810935, |
| "eval_loss": 1.5893759727478027, |
| "eval_runtime": 6.0883, |
| "eval_samples_per_second": 46.811, |
| "eval_steps_per_second": 5.913, |
| "step": 6800 |
| }, |
| { |
| "epoch": 95.83916083916084, |
| "grad_norm": 2.259174108505249, |
| "learning_rate": 8.828571428571429e-06, |
| "loss": 0.103, |
| "step": 6900 |
| }, |
| { |
| "epoch": 95.83916083916084, |
| "eval_cer": 0.3498157843707582, |
| "eval_loss": 1.602723240852356, |
| "eval_runtime": 6.0922, |
| "eval_samples_per_second": 46.781, |
| "eval_steps_per_second": 5.909, |
| "step": 6900 |
| }, |
| { |
| "epoch": 97.22377622377623, |
| "grad_norm": 1.8573235273361206, |
| "learning_rate": 4.585714285714285e-06, |
| "loss": 0.1032, |
| "step": 7000 |
| }, |
| { |
| "epoch": 97.22377622377623, |
| "eval_cer": 0.3484584060500291, |
| "eval_loss": 1.6034408807754517, |
| "eval_runtime": 6.1003, |
| "eval_samples_per_second": 46.719, |
| "eval_steps_per_second": 5.901, |
| "step": 7000 |
| }, |
| { |
| "epoch": 98.61538461538461, |
| "grad_norm": 1.2302757501602173, |
| "learning_rate": 3e-07, |
| "loss": 0.0971, |
| "step": 7100 |
| }, |
| { |
| "epoch": 98.61538461538461, |
| "eval_cer": 0.3480705836726779, |
| "eval_loss": 1.6103968620300293, |
| "eval_runtime": 6.1158, |
| "eval_samples_per_second": 46.6, |
| "eval_steps_per_second": 5.886, |
| "step": 7100 |
| }, |
| { |
| "epoch": 98.61538461538461, |
| "step": 7100, |
| "total_flos": 1.5637471182402683e+19, |
| "train_loss": 1.063481583259475, |
| "train_runtime": 9766.9628, |
| "train_samples_per_second": 23.354, |
| "train_steps_per_second": 0.727 |
| }, |
| { |
| "epoch": 98.61538461538461, |
| "eval_cer": 0.35214271863486524, |
| "eval_loss": 1.6103789806365967, |
| "eval_runtime": 6.0538, |
| "eval_samples_per_second": 47.078, |
| "eval_steps_per_second": 5.947, |
| "step": 7100 |
| } |
| ], |
| "logging_steps": 100, |
| "max_steps": 7100, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 100, |
| "save_steps": 100, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.5637471182402683e+19, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|