| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 7.407709414381023, | |
| "eval_steps": 1000, | |
| "global_step": 5000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.037064492216456635, | |
| "grad_norm": 14.615763664245605, | |
| "learning_rate": 4.800000000000001e-07, | |
| "loss": 1.4607, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.07412898443291327, | |
| "grad_norm": 10.59756851196289, | |
| "learning_rate": 9.800000000000001e-07, | |
| "loss": 1.176, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.1111934766493699, | |
| "grad_norm": 7.142136573791504, | |
| "learning_rate": 1.48e-06, | |
| "loss": 0.9267, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.14825796886582654, | |
| "grad_norm": 9.191902160644531, | |
| "learning_rate": 1.98e-06, | |
| "loss": 0.7253, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.18532246108228317, | |
| "grad_norm": 10.320201873779297, | |
| "learning_rate": 2.4800000000000004e-06, | |
| "loss": 0.7047, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.2223869532987398, | |
| "grad_norm": 8.486912727355957, | |
| "learning_rate": 2.9800000000000003e-06, | |
| "loss": 0.6634, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.25945144551519644, | |
| "grad_norm": 9.802300453186035, | |
| "learning_rate": 3.48e-06, | |
| "loss": 0.5786, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.2965159377316531, | |
| "grad_norm": 9.568249702453613, | |
| "learning_rate": 3.980000000000001e-06, | |
| "loss": 0.5857, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.3335804299481097, | |
| "grad_norm": 7.968526840209961, | |
| "learning_rate": 4.48e-06, | |
| "loss": 0.5385, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.37064492216456635, | |
| "grad_norm": 7.507795810699463, | |
| "learning_rate": 4.980000000000001e-06, | |
| "loss": 0.5151, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.407709414381023, | |
| "grad_norm": 6.258375644683838, | |
| "learning_rate": 5.480000000000001e-06, | |
| "loss": 0.4649, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.4447739065974796, | |
| "grad_norm": 9.89697551727295, | |
| "learning_rate": 5.98e-06, | |
| "loss": 0.4209, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.48183839881393625, | |
| "grad_norm": 7.8507490158081055, | |
| "learning_rate": 6.480000000000001e-06, | |
| "loss": 0.4459, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.5189028910303929, | |
| "grad_norm": 5.835811138153076, | |
| "learning_rate": 6.98e-06, | |
| "loss": 0.4141, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.5559673832468495, | |
| "grad_norm": 6.767547607421875, | |
| "learning_rate": 7.48e-06, | |
| "loss": 0.4108, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.5930318754633062, | |
| "grad_norm": 5.9475884437561035, | |
| "learning_rate": 7.980000000000002e-06, | |
| "loss": 0.41, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.6300963676797627, | |
| "grad_norm": 7.767906188964844, | |
| "learning_rate": 8.48e-06, | |
| "loss": 0.3781, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.6671608598962194, | |
| "grad_norm": 6.990137100219727, | |
| "learning_rate": 8.98e-06, | |
| "loss": 0.39, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.704225352112676, | |
| "grad_norm": 5.607441425323486, | |
| "learning_rate": 9.48e-06, | |
| "loss": 0.3783, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.7412898443291327, | |
| "grad_norm": 6.288857936859131, | |
| "learning_rate": 9.980000000000001e-06, | |
| "loss": 0.3559, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.7783543365455893, | |
| "grad_norm": 6.985698699951172, | |
| "learning_rate": 9.946666666666667e-06, | |
| "loss": 0.3595, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.815418828762046, | |
| "grad_norm": 6.037854194641113, | |
| "learning_rate": 9.891111111111113e-06, | |
| "loss": 0.3163, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.8524833209785025, | |
| "grad_norm": 5.8710784912109375, | |
| "learning_rate": 9.835555555555556e-06, | |
| "loss": 0.3502, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.8895478131949592, | |
| "grad_norm": 6.342834949493408, | |
| "learning_rate": 9.780000000000001e-06, | |
| "loss": 0.317, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.9266123054114158, | |
| "grad_norm": 5.589534759521484, | |
| "learning_rate": 9.724444444444445e-06, | |
| "loss": 0.3228, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.9636767976278725, | |
| "grad_norm": 7.743918418884277, | |
| "learning_rate": 9.66888888888889e-06, | |
| "loss": 0.3144, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 10.073568344116211, | |
| "learning_rate": 9.613333333333335e-06, | |
| "loss": 0.2939, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 1.0370644922164567, | |
| "grad_norm": 4.640520095825195, | |
| "learning_rate": 9.557777777777777e-06, | |
| "loss": 0.1939, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.0741289844329134, | |
| "grad_norm": 3.2049508094787598, | |
| "learning_rate": 9.502222222222223e-06, | |
| "loss": 0.1929, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 1.1111934766493698, | |
| "grad_norm": 3.9065611362457275, | |
| "learning_rate": 9.446666666666667e-06, | |
| "loss": 0.1998, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 1.1482579688658265, | |
| "grad_norm": 3.7471649646759033, | |
| "learning_rate": 9.391111111111111e-06, | |
| "loss": 0.2007, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 1.1853224610822832, | |
| "grad_norm": 3.952751874923706, | |
| "learning_rate": 9.335555555555557e-06, | |
| "loss": 0.1863, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.2223869532987397, | |
| "grad_norm": 5.39549446105957, | |
| "learning_rate": 9.280000000000001e-06, | |
| "loss": 0.1953, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 1.2594514455151964, | |
| "grad_norm": 4.03216552734375, | |
| "learning_rate": 9.224444444444445e-06, | |
| "loss": 0.2065, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 1.296515937731653, | |
| "grad_norm": 3.854651689529419, | |
| "learning_rate": 9.168888888888889e-06, | |
| "loss": 0.1703, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 1.3335804299481098, | |
| "grad_norm": 4.835360050201416, | |
| "learning_rate": 9.113333333333335e-06, | |
| "loss": 0.1692, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.3706449221645665, | |
| "grad_norm": 5.247130393981934, | |
| "learning_rate": 9.057777777777779e-06, | |
| "loss": 0.1982, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 1.407709414381023, | |
| "grad_norm": 3.9537737369537354, | |
| "learning_rate": 9.002222222222223e-06, | |
| "loss": 0.1661, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 1.4447739065974796, | |
| "grad_norm": 4.887810230255127, | |
| "learning_rate": 8.946666666666669e-06, | |
| "loss": 0.1836, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 1.4818383988139363, | |
| "grad_norm": 3.6338751316070557, | |
| "learning_rate": 8.891111111111111e-06, | |
| "loss": 0.1822, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.4818383988139363, | |
| "eval_loss": 0.2655850648880005, | |
| "eval_runtime": 730.9503, | |
| "eval_samples_per_second": 3.947, | |
| "eval_steps_per_second": 0.494, | |
| "eval_wer": 0.14449384404924762, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.5189028910303928, | |
| "grad_norm": 4.078255653381348, | |
| "learning_rate": 8.835555555555557e-06, | |
| "loss": 0.1661, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 1.5559673832468495, | |
| "grad_norm": 3.9311952590942383, | |
| "learning_rate": 8.78e-06, | |
| "loss": 0.1725, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 1.5930318754633062, | |
| "grad_norm": 4.800196170806885, | |
| "learning_rate": 8.724444444444445e-06, | |
| "loss": 0.1704, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 1.6300963676797626, | |
| "grad_norm": 4.550530910491943, | |
| "learning_rate": 8.66888888888889e-06, | |
| "loss": 0.1793, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.6671608598962195, | |
| "grad_norm": 6.508624076843262, | |
| "learning_rate": 8.613333333333333e-06, | |
| "loss": 0.1619, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 1.704225352112676, | |
| "grad_norm": 4.16792106628418, | |
| "learning_rate": 8.557777777777778e-06, | |
| "loss": 0.1652, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 1.7412898443291327, | |
| "grad_norm": 4.420657157897949, | |
| "learning_rate": 8.502222222222223e-06, | |
| "loss": 0.16, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 1.7783543365455894, | |
| "grad_norm": 4.781569004058838, | |
| "learning_rate": 8.446666666666668e-06, | |
| "loss": 0.1695, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.8154188287620459, | |
| "grad_norm": 3.877307176589966, | |
| "learning_rate": 8.391111111111112e-06, | |
| "loss": 0.1529, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 1.8524833209785025, | |
| "grad_norm": 4.159163475036621, | |
| "learning_rate": 8.335555555555556e-06, | |
| "loss": 0.1619, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 1.8895478131949592, | |
| "grad_norm": 3.6631579399108887, | |
| "learning_rate": 8.28e-06, | |
| "loss": 0.1654, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 1.9266123054114157, | |
| "grad_norm": 4.1784210205078125, | |
| "learning_rate": 8.224444444444444e-06, | |
| "loss": 0.1494, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 1.9636767976278726, | |
| "grad_norm": 5.867852210998535, | |
| "learning_rate": 8.16888888888889e-06, | |
| "loss": 0.1443, | |
| "step": 1325 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 5.817214012145996, | |
| "learning_rate": 8.113333333333334e-06, | |
| "loss": 0.139, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 2.0370644922164565, | |
| "grad_norm": 2.3572022914886475, | |
| "learning_rate": 8.057777777777778e-06, | |
| "loss": 0.0614, | |
| "step": 1375 | |
| }, | |
| { | |
| "epoch": 2.0741289844329134, | |
| "grad_norm": 2.2769412994384766, | |
| "learning_rate": 8.002222222222222e-06, | |
| "loss": 0.0606, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 2.11119347664937, | |
| "grad_norm": 2.474583864212036, | |
| "learning_rate": 7.946666666666666e-06, | |
| "loss": 0.0716, | |
| "step": 1425 | |
| }, | |
| { | |
| "epoch": 2.1482579688658268, | |
| "grad_norm": 2.5783841609954834, | |
| "learning_rate": 7.891111111111112e-06, | |
| "loss": 0.065, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 2.1853224610822832, | |
| "grad_norm": 1.6132420301437378, | |
| "learning_rate": 7.835555555555556e-06, | |
| "loss": 0.067, | |
| "step": 1475 | |
| }, | |
| { | |
| "epoch": 2.2223869532987397, | |
| "grad_norm": 3.8042001724243164, | |
| "learning_rate": 7.78e-06, | |
| "loss": 0.0724, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 2.2594514455151966, | |
| "grad_norm": 2.2419843673706055, | |
| "learning_rate": 7.724444444444446e-06, | |
| "loss": 0.0761, | |
| "step": 1525 | |
| }, | |
| { | |
| "epoch": 2.296515937731653, | |
| "grad_norm": 2.706354856491089, | |
| "learning_rate": 7.66888888888889e-06, | |
| "loss": 0.0659, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 2.3335804299481095, | |
| "grad_norm": 2.8394265174865723, | |
| "learning_rate": 7.613333333333334e-06, | |
| "loss": 0.0688, | |
| "step": 1575 | |
| }, | |
| { | |
| "epoch": 2.3706449221645665, | |
| "grad_norm": 2.383784770965576, | |
| "learning_rate": 7.557777777777779e-06, | |
| "loss": 0.0729, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 2.407709414381023, | |
| "grad_norm": 3.0959832668304443, | |
| "learning_rate": 7.502222222222223e-06, | |
| "loss": 0.0626, | |
| "step": 1625 | |
| }, | |
| { | |
| "epoch": 2.4447739065974794, | |
| "grad_norm": 2.927393913269043, | |
| "learning_rate": 7.446666666666668e-06, | |
| "loss": 0.0677, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 2.4818383988139363, | |
| "grad_norm": 2.644434928894043, | |
| "learning_rate": 7.3911111111111125e-06, | |
| "loss": 0.0644, | |
| "step": 1675 | |
| }, | |
| { | |
| "epoch": 2.5189028910303928, | |
| "grad_norm": 2.9071755409240723, | |
| "learning_rate": 7.335555555555556e-06, | |
| "loss": 0.061, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 2.5559673832468492, | |
| "grad_norm": 2.6862034797668457, | |
| "learning_rate": 7.280000000000001e-06, | |
| "loss": 0.0615, | |
| "step": 1725 | |
| }, | |
| { | |
| "epoch": 2.593031875463306, | |
| "grad_norm": 3.1184046268463135, | |
| "learning_rate": 7.224444444444445e-06, | |
| "loss": 0.0714, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 2.6300963676797626, | |
| "grad_norm": 1.7592053413391113, | |
| "learning_rate": 7.1688888888888895e-06, | |
| "loss": 0.0704, | |
| "step": 1775 | |
| }, | |
| { | |
| "epoch": 2.6671608598962195, | |
| "grad_norm": 2.9316508769989014, | |
| "learning_rate": 7.113333333333334e-06, | |
| "loss": 0.0689, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 2.704225352112676, | |
| "grad_norm": 2.1934666633605957, | |
| "learning_rate": 7.057777777777778e-06, | |
| "loss": 0.0721, | |
| "step": 1825 | |
| }, | |
| { | |
| "epoch": 2.741289844329133, | |
| "grad_norm": 3.4919371604919434, | |
| "learning_rate": 7.0022222222222225e-06, | |
| "loss": 0.0638, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 2.7783543365455894, | |
| "grad_norm": 2.723252058029175, | |
| "learning_rate": 6.946666666666667e-06, | |
| "loss": 0.0598, | |
| "step": 1875 | |
| }, | |
| { | |
| "epoch": 2.815418828762046, | |
| "grad_norm": 1.8668267726898193, | |
| "learning_rate": 6.891111111111111e-06, | |
| "loss": 0.0607, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 2.8524833209785028, | |
| "grad_norm": 2.0989866256713867, | |
| "learning_rate": 6.835555555555556e-06, | |
| "loss": 0.0821, | |
| "step": 1925 | |
| }, | |
| { | |
| "epoch": 2.8895478131949592, | |
| "grad_norm": 2.9375364780426025, | |
| "learning_rate": 6.780000000000001e-06, | |
| "loss": 0.0636, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 2.9266123054114157, | |
| "grad_norm": 2.1375315189361572, | |
| "learning_rate": 6.724444444444444e-06, | |
| "loss": 0.0723, | |
| "step": 1975 | |
| }, | |
| { | |
| "epoch": 2.9636767976278726, | |
| "grad_norm": 2.5874264240264893, | |
| "learning_rate": 6.668888888888889e-06, | |
| "loss": 0.0706, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 2.9636767976278726, | |
| "eval_loss": 0.2490690052509308, | |
| "eval_runtime": 730.2087, | |
| "eval_samples_per_second": 3.951, | |
| "eval_steps_per_second": 0.494, | |
| "eval_wer": 0.12696648426812585, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 6.509148597717285, | |
| "learning_rate": 6.613333333333334e-06, | |
| "loss": 0.0587, | |
| "step": 2025 | |
| }, | |
| { | |
| "epoch": 3.0370644922164565, | |
| "grad_norm": 1.9590086936950684, | |
| "learning_rate": 6.557777777777778e-06, | |
| "loss": 0.0241, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 3.0741289844329134, | |
| "grad_norm": 1.4612740278244019, | |
| "learning_rate": 6.502222222222223e-06, | |
| "loss": 0.0267, | |
| "step": 2075 | |
| }, | |
| { | |
| "epoch": 3.11119347664937, | |
| "grad_norm": 0.9522780179977417, | |
| "learning_rate": 6.446666666666668e-06, | |
| "loss": 0.023, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 3.1482579688658268, | |
| "grad_norm": 1.891400694847107, | |
| "learning_rate": 6.391111111111111e-06, | |
| "loss": 0.0281, | |
| "step": 2125 | |
| }, | |
| { | |
| "epoch": 3.1853224610822832, | |
| "grad_norm": 1.0783302783966064, | |
| "learning_rate": 6.335555555555556e-06, | |
| "loss": 0.0246, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 3.2223869532987397, | |
| "grad_norm": 1.3504562377929688, | |
| "learning_rate": 6.280000000000001e-06, | |
| "loss": 0.0244, | |
| "step": 2175 | |
| }, | |
| { | |
| "epoch": 3.2594514455151966, | |
| "grad_norm": 1.8768439292907715, | |
| "learning_rate": 6.224444444444445e-06, | |
| "loss": 0.0264, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 3.296515937731653, | |
| "grad_norm": 1.5083887577056885, | |
| "learning_rate": 6.16888888888889e-06, | |
| "loss": 0.0248, | |
| "step": 2225 | |
| }, | |
| { | |
| "epoch": 3.3335804299481095, | |
| "grad_norm": 3.5768120288848877, | |
| "learning_rate": 6.113333333333333e-06, | |
| "loss": 0.0316, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 3.3706449221645665, | |
| "grad_norm": 1.1493444442749023, | |
| "learning_rate": 6.057777777777778e-06, | |
| "loss": 0.0294, | |
| "step": 2275 | |
| }, | |
| { | |
| "epoch": 3.407709414381023, | |
| "grad_norm": 2.3746306896209717, | |
| "learning_rate": 6.002222222222223e-06, | |
| "loss": 0.0263, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 3.4447739065974794, | |
| "grad_norm": 2.144634485244751, | |
| "learning_rate": 5.946666666666668e-06, | |
| "loss": 0.0348, | |
| "step": 2325 | |
| }, | |
| { | |
| "epoch": 3.4818383988139363, | |
| "grad_norm": 1.5002686977386475, | |
| "learning_rate": 5.891111111111112e-06, | |
| "loss": 0.0228, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 3.5189028910303928, | |
| "grad_norm": 1.6059187650680542, | |
| "learning_rate": 5.8355555555555565e-06, | |
| "loss": 0.0239, | |
| "step": 2375 | |
| }, | |
| { | |
| "epoch": 3.5559673832468492, | |
| "grad_norm": 2.757420778274536, | |
| "learning_rate": 5.78e-06, | |
| "loss": 0.0277, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 3.593031875463306, | |
| "grad_norm": 1.3977222442626953, | |
| "learning_rate": 5.724444444444445e-06, | |
| "loss": 0.0224, | |
| "step": 2425 | |
| }, | |
| { | |
| "epoch": 3.6300963676797626, | |
| "grad_norm": 1.9618048667907715, | |
| "learning_rate": 5.6688888888888895e-06, | |
| "loss": 0.026, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 3.6671608598962195, | |
| "grad_norm": 0.898245632648468, | |
| "learning_rate": 5.613333333333334e-06, | |
| "loss": 0.0326, | |
| "step": 2475 | |
| }, | |
| { | |
| "epoch": 3.704225352112676, | |
| "grad_norm": 1.8148616552352905, | |
| "learning_rate": 5.557777777777778e-06, | |
| "loss": 0.0213, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 3.741289844329133, | |
| "grad_norm": 1.308030366897583, | |
| "learning_rate": 5.5022222222222224e-06, | |
| "loss": 0.0192, | |
| "step": 2525 | |
| }, | |
| { | |
| "epoch": 3.7783543365455894, | |
| "grad_norm": 1.6680744886398315, | |
| "learning_rate": 5.4466666666666665e-06, | |
| "loss": 0.027, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 3.815418828762046, | |
| "grad_norm": 3.235917568206787, | |
| "learning_rate": 5.391111111111111e-06, | |
| "loss": 0.0242, | |
| "step": 2575 | |
| }, | |
| { | |
| "epoch": 3.8524833209785028, | |
| "grad_norm": 2.096780300140381, | |
| "learning_rate": 5.335555555555556e-06, | |
| "loss": 0.0243, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 3.8895478131949592, | |
| "grad_norm": 1.8445031642913818, | |
| "learning_rate": 5.28e-06, | |
| "loss": 0.024, | |
| "step": 2625 | |
| }, | |
| { | |
| "epoch": 3.9266123054114157, | |
| "grad_norm": 1.357937216758728, | |
| "learning_rate": 5.224444444444445e-06, | |
| "loss": 0.0244, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 3.9636767976278726, | |
| "grad_norm": 1.0413466691970825, | |
| "learning_rate": 5.168888888888889e-06, | |
| "loss": 0.0221, | |
| "step": 2675 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 3.0572996139526367, | |
| "learning_rate": 5.113333333333333e-06, | |
| "loss": 0.0206, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 4.037064492216457, | |
| "grad_norm": 0.9961848258972168, | |
| "learning_rate": 5.057777777777778e-06, | |
| "loss": 0.0136, | |
| "step": 2725 | |
| }, | |
| { | |
| "epoch": 4.074128984432913, | |
| "grad_norm": 1.0248702764511108, | |
| "learning_rate": 5.002222222222223e-06, | |
| "loss": 0.009, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 4.11119347664937, | |
| "grad_norm": 0.6142157912254333, | |
| "learning_rate": 4.946666666666667e-06, | |
| "loss": 0.0113, | |
| "step": 2775 | |
| }, | |
| { | |
| "epoch": 4.148257968865827, | |
| "grad_norm": 0.27292531728744507, | |
| "learning_rate": 4.891111111111111e-06, | |
| "loss": 0.009, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 4.185322461082283, | |
| "grad_norm": 2.2906312942504883, | |
| "learning_rate": 4.835555555555556e-06, | |
| "loss": 0.0073, | |
| "step": 2825 | |
| }, | |
| { | |
| "epoch": 4.22238695329874, | |
| "grad_norm": 1.0498850345611572, | |
| "learning_rate": 4.78e-06, | |
| "loss": 0.0093, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 4.259451445515197, | |
| "grad_norm": 1.1574844121932983, | |
| "learning_rate": 4.724444444444445e-06, | |
| "loss": 0.0159, | |
| "step": 2875 | |
| }, | |
| { | |
| "epoch": 4.2965159377316535, | |
| "grad_norm": 0.7209671139717102, | |
| "learning_rate": 4.66888888888889e-06, | |
| "loss": 0.0088, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 4.3335804299481095, | |
| "grad_norm": 1.168841004371643, | |
| "learning_rate": 4.613333333333334e-06, | |
| "loss": 0.0094, | |
| "step": 2925 | |
| }, | |
| { | |
| "epoch": 4.3706449221645665, | |
| "grad_norm": 0.6153778433799744, | |
| "learning_rate": 4.557777777777778e-06, | |
| "loss": 0.009, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 4.407709414381023, | |
| "grad_norm": 1.5705232620239258, | |
| "learning_rate": 4.502222222222223e-06, | |
| "loss": 0.0085, | |
| "step": 2975 | |
| }, | |
| { | |
| "epoch": 4.444773906597479, | |
| "grad_norm": 0.24448032677173615, | |
| "learning_rate": 4.446666666666667e-06, | |
| "loss": 0.0072, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 4.444773906597479, | |
| "eval_loss": 0.27286583185195923, | |
| "eval_runtime": 739.8615, | |
| "eval_samples_per_second": 3.899, | |
| "eval_steps_per_second": 0.488, | |
| "eval_wer": 0.11913474692202462, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 4.481838398813936, | |
| "grad_norm": 1.2278587818145752, | |
| "learning_rate": 4.391111111111112e-06, | |
| "loss": 0.0146, | |
| "step": 3025 | |
| }, | |
| { | |
| "epoch": 4.518902891030393, | |
| "grad_norm": 0.6478213667869568, | |
| "learning_rate": 4.3355555555555565e-06, | |
| "loss": 0.014, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 4.555967383246849, | |
| "grad_norm": 0.7865190505981445, | |
| "learning_rate": 4.2800000000000005e-06, | |
| "loss": 0.0079, | |
| "step": 3075 | |
| }, | |
| { | |
| "epoch": 4.593031875463306, | |
| "grad_norm": 2.3078877925872803, | |
| "learning_rate": 4.2244444444444446e-06, | |
| "loss": 0.009, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 4.630096367679763, | |
| "grad_norm": 0.9625842571258545, | |
| "learning_rate": 4.168888888888889e-06, | |
| "loss": 0.0096, | |
| "step": 3125 | |
| }, | |
| { | |
| "epoch": 4.667160859896219, | |
| "grad_norm": 0.7619579434394836, | |
| "learning_rate": 4.1133333333333335e-06, | |
| "loss": 0.0096, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 4.704225352112676, | |
| "grad_norm": 1.5049270391464233, | |
| "learning_rate": 4.057777777777778e-06, | |
| "loss": 0.0099, | |
| "step": 3175 | |
| }, | |
| { | |
| "epoch": 4.741289844329133, | |
| "grad_norm": 1.1056573390960693, | |
| "learning_rate": 4.002222222222222e-06, | |
| "loss": 0.0065, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 4.778354336545589, | |
| "grad_norm": 0.7983392477035522, | |
| "learning_rate": 3.946666666666667e-06, | |
| "loss": 0.0105, | |
| "step": 3225 | |
| }, | |
| { | |
| "epoch": 4.815418828762046, | |
| "grad_norm": 1.1153795719146729, | |
| "learning_rate": 3.891111111111111e-06, | |
| "loss": 0.0075, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 4.852483320978503, | |
| "grad_norm": 0.9730608463287354, | |
| "learning_rate": 3.835555555555555e-06, | |
| "loss": 0.0087, | |
| "step": 3275 | |
| }, | |
| { | |
| "epoch": 4.889547813194959, | |
| "grad_norm": 0.5694206953048706, | |
| "learning_rate": 3.7800000000000002e-06, | |
| "loss": 0.0071, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 4.926612305411416, | |
| "grad_norm": 0.2520028352737427, | |
| "learning_rate": 3.724444444444445e-06, | |
| "loss": 0.0081, | |
| "step": 3325 | |
| }, | |
| { | |
| "epoch": 4.963676797627873, | |
| "grad_norm": 0.436355322599411, | |
| "learning_rate": 3.668888888888889e-06, | |
| "loss": 0.0078, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 0.798361599445343, | |
| "learning_rate": 3.6133333333333336e-06, | |
| "loss": 0.0075, | |
| "step": 3375 | |
| }, | |
| { | |
| "epoch": 5.037064492216457, | |
| "grad_norm": 1.3702267408370972, | |
| "learning_rate": 3.5577777777777785e-06, | |
| "loss": 0.005, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 5.074128984432913, | |
| "grad_norm": 0.2790464162826538, | |
| "learning_rate": 3.5022222222222225e-06, | |
| "loss": 0.0032, | |
| "step": 3425 | |
| }, | |
| { | |
| "epoch": 5.11119347664937, | |
| "grad_norm": 0.15111476182937622, | |
| "learning_rate": 3.446666666666667e-06, | |
| "loss": 0.0046, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 5.148257968865827, | |
| "grad_norm": 0.09985285252332687, | |
| "learning_rate": 3.391111111111111e-06, | |
| "loss": 0.0035, | |
| "step": 3475 | |
| }, | |
| { | |
| "epoch": 5.185322461082283, | |
| "grad_norm": 0.5352105498313904, | |
| "learning_rate": 3.335555555555556e-06, | |
| "loss": 0.0031, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 5.22238695329874, | |
| "grad_norm": 0.9406213760375977, | |
| "learning_rate": 3.2800000000000004e-06, | |
| "loss": 0.0035, | |
| "step": 3525 | |
| }, | |
| { | |
| "epoch": 5.259451445515197, | |
| "grad_norm": 0.7073507905006409, | |
| "learning_rate": 3.2244444444444444e-06, | |
| "loss": 0.0035, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 5.2965159377316535, | |
| "grad_norm": 0.07916448265314102, | |
| "learning_rate": 3.1688888888888893e-06, | |
| "loss": 0.0035, | |
| "step": 3575 | |
| }, | |
| { | |
| "epoch": 5.3335804299481095, | |
| "grad_norm": 0.5285120606422424, | |
| "learning_rate": 3.1133333333333337e-06, | |
| "loss": 0.0027, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 5.3706449221645665, | |
| "grad_norm": 0.09832775592803955, | |
| "learning_rate": 3.0577777777777778e-06, | |
| "loss": 0.0036, | |
| "step": 3625 | |
| }, | |
| { | |
| "epoch": 5.407709414381023, | |
| "grad_norm": 0.21083103120326996, | |
| "learning_rate": 3.0022222222222227e-06, | |
| "loss": 0.0041, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 5.444773906597479, | |
| "grad_norm": 0.6747980713844299, | |
| "learning_rate": 2.946666666666667e-06, | |
| "loss": 0.003, | |
| "step": 3675 | |
| }, | |
| { | |
| "epoch": 5.481838398813936, | |
| "grad_norm": 0.5111549496650696, | |
| "learning_rate": 2.891111111111111e-06, | |
| "loss": 0.0028, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 5.518902891030393, | |
| "grad_norm": 0.6502516269683838, | |
| "learning_rate": 2.835555555555556e-06, | |
| "loss": 0.0045, | |
| "step": 3725 | |
| }, | |
| { | |
| "epoch": 5.555967383246849, | |
| "grad_norm": 0.4688964784145355, | |
| "learning_rate": 2.7800000000000005e-06, | |
| "loss": 0.0036, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 5.593031875463306, | |
| "grad_norm": 0.281994104385376, | |
| "learning_rate": 2.7244444444444445e-06, | |
| "loss": 0.0021, | |
| "step": 3775 | |
| }, | |
| { | |
| "epoch": 5.630096367679763, | |
| "grad_norm": 0.11583279073238373, | |
| "learning_rate": 2.6688888888888894e-06, | |
| "loss": 0.0041, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 5.667160859896219, | |
| "grad_norm": 0.22941534221172333, | |
| "learning_rate": 2.6133333333333334e-06, | |
| "loss": 0.0022, | |
| "step": 3825 | |
| }, | |
| { | |
| "epoch": 5.704225352112676, | |
| "grad_norm": 0.13950073719024658, | |
| "learning_rate": 2.557777777777778e-06, | |
| "loss": 0.003, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 5.741289844329133, | |
| "grad_norm": 0.6869206428527832, | |
| "learning_rate": 2.5022222222222224e-06, | |
| "loss": 0.0024, | |
| "step": 3875 | |
| }, | |
| { | |
| "epoch": 5.778354336545589, | |
| "grad_norm": 0.09893081337213516, | |
| "learning_rate": 2.446666666666667e-06, | |
| "loss": 0.0029, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 5.815418828762046, | |
| "grad_norm": 0.1264762133359909, | |
| "learning_rate": 2.3911111111111113e-06, | |
| "loss": 0.0033, | |
| "step": 3925 | |
| }, | |
| { | |
| "epoch": 5.852483320978503, | |
| "grad_norm": 0.15489889681339264, | |
| "learning_rate": 2.3355555555555557e-06, | |
| "loss": 0.003, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 5.889547813194959, | |
| "grad_norm": 0.5875250697135925, | |
| "learning_rate": 2.28e-06, | |
| "loss": 0.0022, | |
| "step": 3975 | |
| }, | |
| { | |
| "epoch": 5.926612305411416, | |
| "grad_norm": 0.06691984087228775, | |
| "learning_rate": 2.2244444444444447e-06, | |
| "loss": 0.005, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 5.926612305411416, | |
| "eval_loss": 0.28099098801612854, | |
| "eval_runtime": 734.9707, | |
| "eval_samples_per_second": 3.925, | |
| "eval_steps_per_second": 0.491, | |
| "eval_wer": 0.11566347469220246, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 5.963676797627873, | |
| "grad_norm": 0.2645249664783478, | |
| "learning_rate": 2.168888888888889e-06, | |
| "loss": 0.0026, | |
| "step": 4025 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "grad_norm": 0.3361597955226898, | |
| "learning_rate": 2.1133333333333336e-06, | |
| "loss": 0.0023, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 6.037064492216457, | |
| "grad_norm": 0.059147898107767105, | |
| "learning_rate": 2.057777777777778e-06, | |
| "loss": 0.0015, | |
| "step": 4075 | |
| }, | |
| { | |
| "epoch": 6.074128984432913, | |
| "grad_norm": 0.1158735603094101, | |
| "learning_rate": 2.0022222222222225e-06, | |
| "loss": 0.0016, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 6.11119347664937, | |
| "grad_norm": 1.3564985990524292, | |
| "learning_rate": 1.9466666666666665e-06, | |
| "loss": 0.0014, | |
| "step": 4125 | |
| }, | |
| { | |
| "epoch": 6.148257968865827, | |
| "grad_norm": 0.5956087112426758, | |
| "learning_rate": 1.8911111111111114e-06, | |
| "loss": 0.0018, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 6.185322461082283, | |
| "grad_norm": 0.09224885702133179, | |
| "learning_rate": 1.8355555555555557e-06, | |
| "loss": 0.0017, | |
| "step": 4175 | |
| }, | |
| { | |
| "epoch": 6.22238695329874, | |
| "grad_norm": 0.06868930906057358, | |
| "learning_rate": 1.7800000000000001e-06, | |
| "loss": 0.0017, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 6.259451445515197, | |
| "grad_norm": 0.06657718122005463, | |
| "learning_rate": 1.7244444444444448e-06, | |
| "loss": 0.0014, | |
| "step": 4225 | |
| }, | |
| { | |
| "epoch": 6.2965159377316535, | |
| "grad_norm": 0.05459928885102272, | |
| "learning_rate": 1.668888888888889e-06, | |
| "loss": 0.0017, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 6.3335804299481095, | |
| "grad_norm": 0.05795517563819885, | |
| "learning_rate": 1.6133333333333335e-06, | |
| "loss": 0.0027, | |
| "step": 4275 | |
| }, | |
| { | |
| "epoch": 6.3706449221645665, | |
| "grad_norm": 0.06204914301633835, | |
| "learning_rate": 1.5577777777777777e-06, | |
| "loss": 0.0012, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 6.407709414381023, | |
| "grad_norm": 0.0820712074637413, | |
| "learning_rate": 1.5022222222222224e-06, | |
| "loss": 0.0012, | |
| "step": 4325 | |
| }, | |
| { | |
| "epoch": 6.444773906597479, | |
| "grad_norm": 0.056523606181144714, | |
| "learning_rate": 1.4466666666666669e-06, | |
| "loss": 0.0013, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 6.481838398813936, | |
| "grad_norm": 0.07985592633485794, | |
| "learning_rate": 1.3911111111111111e-06, | |
| "loss": 0.0014, | |
| "step": 4375 | |
| }, | |
| { | |
| "epoch": 6.518902891030393, | |
| "grad_norm": 0.044111426919698715, | |
| "learning_rate": 1.3355555555555558e-06, | |
| "loss": 0.0012, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 6.555967383246849, | |
| "grad_norm": 0.05683915689587593, | |
| "learning_rate": 1.28e-06, | |
| "loss": 0.0014, | |
| "step": 4425 | |
| }, | |
| { | |
| "epoch": 6.593031875463306, | |
| "grad_norm": 0.08568093180656433, | |
| "learning_rate": 1.2244444444444445e-06, | |
| "loss": 0.0012, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 6.630096367679763, | |
| "grad_norm": 0.054062824696302414, | |
| "learning_rate": 1.168888888888889e-06, | |
| "loss": 0.0011, | |
| "step": 4475 | |
| }, | |
| { | |
| "epoch": 6.667160859896219, | |
| "grad_norm": 0.0509476363658905, | |
| "learning_rate": 1.1133333333333334e-06, | |
| "loss": 0.0013, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 6.704225352112676, | |
| "grad_norm": 0.04927874356508255, | |
| "learning_rate": 1.0577777777777779e-06, | |
| "loss": 0.0012, | |
| "step": 4525 | |
| }, | |
| { | |
| "epoch": 6.741289844329133, | |
| "grad_norm": 0.08598697185516357, | |
| "learning_rate": 1.0022222222222223e-06, | |
| "loss": 0.0011, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 6.778354336545589, | |
| "grad_norm": 0.3571934700012207, | |
| "learning_rate": 9.466666666666667e-07, | |
| "loss": 0.0016, | |
| "step": 4575 | |
| }, | |
| { | |
| "epoch": 6.815418828762046, | |
| "grad_norm": 0.05977300554513931, | |
| "learning_rate": 8.911111111111112e-07, | |
| "loss": 0.001, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 6.852483320978503, | |
| "grad_norm": 0.05966237559914589, | |
| "learning_rate": 8.355555555555556e-07, | |
| "loss": 0.001, | |
| "step": 4625 | |
| }, | |
| { | |
| "epoch": 6.889547813194959, | |
| "grad_norm": 0.05432112514972687, | |
| "learning_rate": 7.8e-07, | |
| "loss": 0.001, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 6.926612305411416, | |
| "grad_norm": 0.06741122156381607, | |
| "learning_rate": 7.244444444444446e-07, | |
| "loss": 0.0019, | |
| "step": 4675 | |
| }, | |
| { | |
| "epoch": 6.963676797627873, | |
| "grad_norm": 0.04723643884062767, | |
| "learning_rate": 6.68888888888889e-07, | |
| "loss": 0.0012, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "grad_norm": 0.07329325377941132, | |
| "learning_rate": 6.133333333333333e-07, | |
| "loss": 0.001, | |
| "step": 4725 | |
| }, | |
| { | |
| "epoch": 7.037064492216457, | |
| "grad_norm": 0.06389188766479492, | |
| "learning_rate": 5.577777777777779e-07, | |
| "loss": 0.001, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 7.074128984432913, | |
| "grad_norm": 0.03797365352511406, | |
| "learning_rate": 5.022222222222222e-07, | |
| "loss": 0.001, | |
| "step": 4775 | |
| }, | |
| { | |
| "epoch": 7.11119347664937, | |
| "grad_norm": 0.04686768725514412, | |
| "learning_rate": 4.466666666666667e-07, | |
| "loss": 0.0009, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 7.148257968865827, | |
| "grad_norm": 0.06883518397808075, | |
| "learning_rate": 3.9111111111111115e-07, | |
| "loss": 0.001, | |
| "step": 4825 | |
| }, | |
| { | |
| "epoch": 7.185322461082283, | |
| "grad_norm": 0.02842629700899124, | |
| "learning_rate": 3.3555555555555556e-07, | |
| "loss": 0.0009, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 7.22238695329874, | |
| "grad_norm": 0.04749394953250885, | |
| "learning_rate": 2.8e-07, | |
| "loss": 0.001, | |
| "step": 4875 | |
| }, | |
| { | |
| "epoch": 7.259451445515197, | |
| "grad_norm": 0.04491546377539635, | |
| "learning_rate": 2.2444444444444445e-07, | |
| "loss": 0.001, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 7.2965159377316535, | |
| "grad_norm": 0.056013334542512894, | |
| "learning_rate": 1.6888888888888888e-07, | |
| "loss": 0.001, | |
| "step": 4925 | |
| }, | |
| { | |
| "epoch": 7.3335804299481095, | |
| "grad_norm": 0.057778194546699524, | |
| "learning_rate": 1.1333333333333336e-07, | |
| "loss": 0.0011, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 7.3706449221645665, | |
| "grad_norm": 0.051241885870695114, | |
| "learning_rate": 5.777777777777778e-08, | |
| "loss": 0.0011, | |
| "step": 4975 | |
| }, | |
| { | |
| "epoch": 7.407709414381023, | |
| "grad_norm": 0.06301814317703247, | |
| "learning_rate": 2.2222222222222225e-09, | |
| "loss": 0.0009, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 7.407709414381023, | |
| "eval_loss": 0.29011788964271545, | |
| "eval_runtime": 732.4342, | |
| "eval_samples_per_second": 3.939, | |
| "eval_steps_per_second": 0.493, | |
| "eval_wer": 0.1146545827633379, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 7.407709414381023, | |
| "step": 5000, | |
| "total_flos": 8.155551755501568e+19, | |
| "train_loss": 0.10907779041565954, | |
| "train_runtime": 12394.4337, | |
| "train_samples_per_second": 6.455, | |
| "train_steps_per_second": 0.403 | |
| } | |
| ], | |
| "logging_steps": 25, | |
| "max_steps": 5000, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 8, | |
| "save_steps": 1000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 8.155551755501568e+19, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |