| { |
| "best_metric": 81.62187647336162, |
| "best_model_checkpoint": "./iteboshi_student_model_temp/checkpoint-19000", |
| "epoch": 22.026431718061673, |
| "eval_steps": 1000, |
| "global_step": 20000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.02753303964757709, |
| "grad_norm": 4.4843268394470215, |
| "learning_rate": 1.0000000000000002e-06, |
| "loss": 11.1906, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.05506607929515418, |
| "grad_norm": 2.329829692840576, |
| "learning_rate": 2.0000000000000003e-06, |
| "loss": 10.3963, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.08259911894273128, |
| "grad_norm": 2.1956491470336914, |
| "learning_rate": 3e-06, |
| "loss": 8.582, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.11013215859030837, |
| "grad_norm": 1.6026716232299805, |
| "learning_rate": 4.000000000000001e-06, |
| "loss": 6.6173, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.13766519823788545, |
| "grad_norm": 2.139051675796509, |
| "learning_rate": 5e-06, |
| "loss": 5.5439, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.16519823788546256, |
| "grad_norm": 1.4878082275390625, |
| "learning_rate": 6e-06, |
| "loss": 4.606, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.19273127753303965, |
| "grad_norm": 1.5405058860778809, |
| "learning_rate": 7e-06, |
| "loss": 4.2556, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.22026431718061673, |
| "grad_norm": 1.7798066139221191, |
| "learning_rate": 8.000000000000001e-06, |
| "loss": 4.1084, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.24779735682819384, |
| "grad_norm": 1.461482048034668, |
| "learning_rate": 9e-06, |
| "loss": 3.9714, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.2753303964757709, |
| "grad_norm": 1.7320806980133057, |
| "learning_rate": 1e-05, |
| "loss": 3.8459, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.30286343612334804, |
| "grad_norm": 1.756719708442688, |
| "learning_rate": 1.1000000000000001e-05, |
| "loss": 3.7018, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.3303964757709251, |
| "grad_norm": 1.5017330646514893, |
| "learning_rate": 1.2e-05, |
| "loss": 3.5979, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.3579295154185022, |
| "grad_norm": 1.5448871850967407, |
| "learning_rate": 1.3000000000000001e-05, |
| "loss": 3.495, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.3854625550660793, |
| "grad_norm": 1.4887356758117676, |
| "learning_rate": 1.4e-05, |
| "loss": 3.4151, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.4129955947136564, |
| "grad_norm": 1.64043390750885, |
| "learning_rate": 1.5000000000000002e-05, |
| "loss": 3.3033, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.44052863436123346, |
| "grad_norm": 1.9127826690673828, |
| "learning_rate": 1.6000000000000003e-05, |
| "loss": 3.134, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.46806167400881055, |
| "grad_norm": 1.5309818983078003, |
| "learning_rate": 1.7e-05, |
| "loss": 3.0402, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.4955947136563877, |
| "grad_norm": 1.752582311630249, |
| "learning_rate": 1.8e-05, |
| "loss": 2.9363, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.5231277533039648, |
| "grad_norm": 1.5565321445465088, |
| "learning_rate": 1.9e-05, |
| "loss": 2.7852, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.5506607929515418, |
| "grad_norm": 1.5152559280395508, |
| "learning_rate": 2e-05, |
| "loss": 2.587, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.5781938325991189, |
| "grad_norm": 1.79190993309021, |
| "learning_rate": 1.9974358974358975e-05, |
| "loss": 2.46, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.6057268722466961, |
| "grad_norm": 1.7385997772216797, |
| "learning_rate": 1.994871794871795e-05, |
| "loss": 2.2762, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.6332599118942731, |
| "grad_norm": 1.6728637218475342, |
| "learning_rate": 1.9923076923076926e-05, |
| "loss": 2.1675, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.6607929515418502, |
| "grad_norm": 1.6606602668762207, |
| "learning_rate": 1.98974358974359e-05, |
| "loss": 2.0062, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.6883259911894273, |
| "grad_norm": 1.6768505573272705, |
| "learning_rate": 1.9871794871794873e-05, |
| "loss": 1.9047, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.7158590308370044, |
| "grad_norm": 2.253502130508423, |
| "learning_rate": 1.9846153846153847e-05, |
| "loss": 1.8625, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.7433920704845814, |
| "grad_norm": 2.384761333465576, |
| "learning_rate": 1.9820512820512824e-05, |
| "loss": 1.7707, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.7709251101321586, |
| "grad_norm": 1.652563452720642, |
| "learning_rate": 1.9794871794871798e-05, |
| "loss": 1.6815, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.7984581497797357, |
| "grad_norm": 2.095320224761963, |
| "learning_rate": 1.976923076923077e-05, |
| "loss": 1.6741, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.8259911894273128, |
| "grad_norm": 1.6573553085327148, |
| "learning_rate": 1.9743589743589745e-05, |
| "loss": 1.6342, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.8535242290748899, |
| "grad_norm": 2.439465045928955, |
| "learning_rate": 1.9717948717948722e-05, |
| "loss": 1.5066, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.8810572687224669, |
| "grad_norm": 2.218928575515747, |
| "learning_rate": 1.9692307692307696e-05, |
| "loss": 1.508, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.9085903083700441, |
| "grad_norm": 1.4493534564971924, |
| "learning_rate": 1.9666666666666666e-05, |
| "loss": 1.3678, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.9361233480176211, |
| "grad_norm": 1.3857249021530151, |
| "learning_rate": 1.9641025641025643e-05, |
| "loss": 1.372, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.9636563876651982, |
| "grad_norm": 1.2312829494476318, |
| "learning_rate": 1.9615384615384617e-05, |
| "loss": 1.3583, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.9911894273127754, |
| "grad_norm": 1.7964283227920532, |
| "learning_rate": 1.958974358974359e-05, |
| "loss": 1.3459, |
| "step": 900 |
| }, |
| { |
| "epoch": 1.0187224669603525, |
| "grad_norm": 1.721659779548645, |
| "learning_rate": 1.9564102564102564e-05, |
| "loss": 1.2279, |
| "step": 925 |
| }, |
| { |
| "epoch": 1.0462555066079295, |
| "grad_norm": 1.5802332162857056, |
| "learning_rate": 1.953846153846154e-05, |
| "loss": 1.1097, |
| "step": 950 |
| }, |
| { |
| "epoch": 1.0737885462555066, |
| "grad_norm": 1.7199921607971191, |
| "learning_rate": 1.9512820512820515e-05, |
| "loss": 1.1012, |
| "step": 975 |
| }, |
| { |
| "epoch": 1.1013215859030836, |
| "grad_norm": 1.5837390422821045, |
| "learning_rate": 1.9487179487179488e-05, |
| "loss": 1.0877, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.1013215859030836, |
| "eval_cer": 54.04090195224285, |
| "eval_loss": 1.2933422327041626, |
| "eval_runtime": 1360.2269, |
| "eval_samples_per_second": 7.779, |
| "eval_steps_per_second": 1.945, |
| "eval_wer": 98.37812352663839, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.1288546255506609, |
| "grad_norm": 1.3139415979385376, |
| "learning_rate": 1.9461538461538462e-05, |
| "loss": 1.0589, |
| "step": 1025 |
| }, |
| { |
| "epoch": 1.1563876651982379, |
| "grad_norm": 1.2565332651138306, |
| "learning_rate": 1.943589743589744e-05, |
| "loss": 1.0856, |
| "step": 1050 |
| }, |
| { |
| "epoch": 1.183920704845815, |
| "grad_norm": 1.5922749042510986, |
| "learning_rate": 1.9410256410256413e-05, |
| "loss": 1.0407, |
| "step": 1075 |
| }, |
| { |
| "epoch": 1.2114537444933922, |
| "grad_norm": 1.555267572402954, |
| "learning_rate": 1.9384615384615386e-05, |
| "loss": 1.0012, |
| "step": 1100 |
| }, |
| { |
| "epoch": 1.2389867841409692, |
| "grad_norm": 1.5344181060791016, |
| "learning_rate": 1.935897435897436e-05, |
| "loss": 1.0704, |
| "step": 1125 |
| }, |
| { |
| "epoch": 1.2665198237885462, |
| "grad_norm": 1.516708493232727, |
| "learning_rate": 1.9333333333333333e-05, |
| "loss": 0.9818, |
| "step": 1150 |
| }, |
| { |
| "epoch": 1.2940528634361232, |
| "grad_norm": 1.461151123046875, |
| "learning_rate": 1.930769230769231e-05, |
| "loss": 0.9635, |
| "step": 1175 |
| }, |
| { |
| "epoch": 1.3215859030837005, |
| "grad_norm": 1.800354242324829, |
| "learning_rate": 1.9282051282051284e-05, |
| "loss": 0.9683, |
| "step": 1200 |
| }, |
| { |
| "epoch": 1.3491189427312775, |
| "grad_norm": 1.3849306106567383, |
| "learning_rate": 1.9256410256410258e-05, |
| "loss": 0.9675, |
| "step": 1225 |
| }, |
| { |
| "epoch": 1.3766519823788546, |
| "grad_norm": 1.376198172569275, |
| "learning_rate": 1.923076923076923e-05, |
| "loss": 0.986, |
| "step": 1250 |
| }, |
| { |
| "epoch": 1.4041850220264318, |
| "grad_norm": 3.222456693649292, |
| "learning_rate": 1.920512820512821e-05, |
| "loss": 0.9326, |
| "step": 1275 |
| }, |
| { |
| "epoch": 1.4317180616740088, |
| "grad_norm": 1.2251626253128052, |
| "learning_rate": 1.9179487179487182e-05, |
| "loss": 0.915, |
| "step": 1300 |
| }, |
| { |
| "epoch": 1.4592511013215859, |
| "grad_norm": 1.2199984788894653, |
| "learning_rate": 1.9153846153846156e-05, |
| "loss": 0.9763, |
| "step": 1325 |
| }, |
| { |
| "epoch": 1.4867841409691631, |
| "grad_norm": 1.6957029104232788, |
| "learning_rate": 1.912820512820513e-05, |
| "loss": 0.9419, |
| "step": 1350 |
| }, |
| { |
| "epoch": 1.51431718061674, |
| "grad_norm": 1.2903211116790771, |
| "learning_rate": 1.9102564102564106e-05, |
| "loss": 0.9087, |
| "step": 1375 |
| }, |
| { |
| "epoch": 1.5418502202643172, |
| "grad_norm": 1.2986080646514893, |
| "learning_rate": 1.907692307692308e-05, |
| "loss": 0.9325, |
| "step": 1400 |
| }, |
| { |
| "epoch": 1.5693832599118944, |
| "grad_norm": 1.4618104696273804, |
| "learning_rate": 1.905128205128205e-05, |
| "loss": 0.9184, |
| "step": 1425 |
| }, |
| { |
| "epoch": 1.5969162995594712, |
| "grad_norm": 2.7148098945617676, |
| "learning_rate": 1.9025641025641027e-05, |
| "loss": 0.9187, |
| "step": 1450 |
| }, |
| { |
| "epoch": 1.6244493392070485, |
| "grad_norm": 1.289120078086853, |
| "learning_rate": 1.9e-05, |
| "loss": 0.8714, |
| "step": 1475 |
| }, |
| { |
| "epoch": 1.6519823788546255, |
| "grad_norm": 1.3392589092254639, |
| "learning_rate": 1.8974358974358975e-05, |
| "loss": 0.8392, |
| "step": 1500 |
| }, |
| { |
| "epoch": 1.6795154185022025, |
| "grad_norm": 1.1385656595230103, |
| "learning_rate": 1.894871794871795e-05, |
| "loss": 0.8515, |
| "step": 1525 |
| }, |
| { |
| "epoch": 1.7070484581497798, |
| "grad_norm": 1.4275093078613281, |
| "learning_rate": 1.8923076923076925e-05, |
| "loss": 0.8418, |
| "step": 1550 |
| }, |
| { |
| "epoch": 1.7345814977973568, |
| "grad_norm": 1.6075072288513184, |
| "learning_rate": 1.88974358974359e-05, |
| "loss": 0.8379, |
| "step": 1575 |
| }, |
| { |
| "epoch": 1.7621145374449338, |
| "grad_norm": 1.2246577739715576, |
| "learning_rate": 1.8871794871794873e-05, |
| "loss": 0.8321, |
| "step": 1600 |
| }, |
| { |
| "epoch": 1.789647577092511, |
| "grad_norm": 1.1842765808105469, |
| "learning_rate": 1.8846153846153846e-05, |
| "loss": 0.8636, |
| "step": 1625 |
| }, |
| { |
| "epoch": 1.8171806167400881, |
| "grad_norm": 2.2065274715423584, |
| "learning_rate": 1.8820512820512823e-05, |
| "loss": 0.8869, |
| "step": 1650 |
| }, |
| { |
| "epoch": 1.8447136563876652, |
| "grad_norm": 1.0761529207229614, |
| "learning_rate": 1.8794871794871797e-05, |
| "loss": 0.8663, |
| "step": 1675 |
| }, |
| { |
| "epoch": 1.8722466960352424, |
| "grad_norm": 1.3553169965744019, |
| "learning_rate": 1.876923076923077e-05, |
| "loss": 0.8197, |
| "step": 1700 |
| }, |
| { |
| "epoch": 1.8997797356828194, |
| "grad_norm": 1.3320152759552002, |
| "learning_rate": 1.8743589743589744e-05, |
| "loss": 0.8146, |
| "step": 1725 |
| }, |
| { |
| "epoch": 1.9273127753303965, |
| "grad_norm": 1.7702279090881348, |
| "learning_rate": 1.8717948717948718e-05, |
| "loss": 0.8255, |
| "step": 1750 |
| }, |
| { |
| "epoch": 1.9548458149779737, |
| "grad_norm": 1.497201681137085, |
| "learning_rate": 1.8692307692307695e-05, |
| "loss": 0.81, |
| "step": 1775 |
| }, |
| { |
| "epoch": 1.9823788546255505, |
| "grad_norm": 1.082921028137207, |
| "learning_rate": 1.866666666666667e-05, |
| "loss": 0.7969, |
| "step": 1800 |
| }, |
| { |
| "epoch": 2.0099118942731278, |
| "grad_norm": 0.8988214135169983, |
| "learning_rate": 1.8641025641025642e-05, |
| "loss": 0.7524, |
| "step": 1825 |
| }, |
| { |
| "epoch": 2.037444933920705, |
| "grad_norm": 1.0440396070480347, |
| "learning_rate": 1.8615384615384616e-05, |
| "loss": 0.5941, |
| "step": 1850 |
| }, |
| { |
| "epoch": 2.064977973568282, |
| "grad_norm": 1.1343231201171875, |
| "learning_rate": 1.8589743589743593e-05, |
| "loss": 0.5931, |
| "step": 1875 |
| }, |
| { |
| "epoch": 2.092511013215859, |
| "grad_norm": 0.8273594379425049, |
| "learning_rate": 1.8564102564102567e-05, |
| "loss": 0.5839, |
| "step": 1900 |
| }, |
| { |
| "epoch": 2.1200440528634363, |
| "grad_norm": 1.1608532667160034, |
| "learning_rate": 1.853846153846154e-05, |
| "loss": 0.6335, |
| "step": 1925 |
| }, |
| { |
| "epoch": 2.147577092511013, |
| "grad_norm": 1.0717401504516602, |
| "learning_rate": 1.8512820512820514e-05, |
| "loss": 0.5711, |
| "step": 1950 |
| }, |
| { |
| "epoch": 2.1751101321585904, |
| "grad_norm": 1.2120345830917358, |
| "learning_rate": 1.848717948717949e-05, |
| "loss": 0.5823, |
| "step": 1975 |
| }, |
| { |
| "epoch": 2.202643171806167, |
| "grad_norm": 0.9634266495704651, |
| "learning_rate": 1.8461538461538465e-05, |
| "loss": 0.6047, |
| "step": 2000 |
| }, |
| { |
| "epoch": 2.202643171806167, |
| "eval_cer": 34.19462817970263, |
| "eval_loss": 0.8662471175193787, |
| "eval_runtime": 1317.6626, |
| "eval_samples_per_second": 8.03, |
| "eval_steps_per_second": 2.008, |
| "eval_wer": 91.66430928807166, |
| "step": 2000 |
| }, |
| { |
| "epoch": 2.2301762114537445, |
| "grad_norm": 2.5108625888824463, |
| "learning_rate": 1.8435897435897435e-05, |
| "loss": 0.5885, |
| "step": 2025 |
| }, |
| { |
| "epoch": 2.2577092511013217, |
| "grad_norm": 1.1990822553634644, |
| "learning_rate": 1.8410256410256412e-05, |
| "loss": 0.5913, |
| "step": 2050 |
| }, |
| { |
| "epoch": 2.2852422907488985, |
| "grad_norm": 0.9576376080513, |
| "learning_rate": 1.8384615384615386e-05, |
| "loss": 0.5648, |
| "step": 2075 |
| }, |
| { |
| "epoch": 2.3127753303964758, |
| "grad_norm": 2.0012855529785156, |
| "learning_rate": 1.835897435897436e-05, |
| "loss": 0.5829, |
| "step": 2100 |
| }, |
| { |
| "epoch": 2.340308370044053, |
| "grad_norm": 1.3808636665344238, |
| "learning_rate": 1.8333333333333333e-05, |
| "loss": 0.5258, |
| "step": 2125 |
| }, |
| { |
| "epoch": 2.36784140969163, |
| "grad_norm": 0.9106646776199341, |
| "learning_rate": 1.830769230769231e-05, |
| "loss": 0.5541, |
| "step": 2150 |
| }, |
| { |
| "epoch": 2.395374449339207, |
| "grad_norm": 0.9727596640586853, |
| "learning_rate": 1.8282051282051284e-05, |
| "loss": 0.5608, |
| "step": 2175 |
| }, |
| { |
| "epoch": 2.4229074889867843, |
| "grad_norm": 1.2104076147079468, |
| "learning_rate": 1.8256410256410257e-05, |
| "loss": 0.5298, |
| "step": 2200 |
| }, |
| { |
| "epoch": 2.450440528634361, |
| "grad_norm": 0.9904928803443909, |
| "learning_rate": 1.823076923076923e-05, |
| "loss": 0.5236, |
| "step": 2225 |
| }, |
| { |
| "epoch": 2.4779735682819384, |
| "grad_norm": 1.0200227499008179, |
| "learning_rate": 1.8205128205128208e-05, |
| "loss": 0.556, |
| "step": 2250 |
| }, |
| { |
| "epoch": 2.505506607929515, |
| "grad_norm": 1.2327479124069214, |
| "learning_rate": 1.817948717948718e-05, |
| "loss": 0.6067, |
| "step": 2275 |
| }, |
| { |
| "epoch": 2.5330396475770924, |
| "grad_norm": 0.8597280383110046, |
| "learning_rate": 1.8153846153846155e-05, |
| "loss": 0.5704, |
| "step": 2300 |
| }, |
| { |
| "epoch": 2.5605726872246697, |
| "grad_norm": 1.0720624923706055, |
| "learning_rate": 1.812820512820513e-05, |
| "loss": 0.5499, |
| "step": 2325 |
| }, |
| { |
| "epoch": 2.5881057268722465, |
| "grad_norm": 0.8711467981338501, |
| "learning_rate": 1.8102564102564102e-05, |
| "loss": 0.5539, |
| "step": 2350 |
| }, |
| { |
| "epoch": 2.6156387665198237, |
| "grad_norm": 0.9706287384033203, |
| "learning_rate": 1.807692307692308e-05, |
| "loss": 0.5647, |
| "step": 2375 |
| }, |
| { |
| "epoch": 2.643171806167401, |
| "grad_norm": 0.9005234241485596, |
| "learning_rate": 1.8051282051282053e-05, |
| "loss": 0.5261, |
| "step": 2400 |
| }, |
| { |
| "epoch": 2.670704845814978, |
| "grad_norm": 0.9805070161819458, |
| "learning_rate": 1.8025641025641027e-05, |
| "loss": 0.5165, |
| "step": 2425 |
| }, |
| { |
| "epoch": 2.698237885462555, |
| "grad_norm": 1.09579598903656, |
| "learning_rate": 1.8e-05, |
| "loss": 0.5761, |
| "step": 2450 |
| }, |
| { |
| "epoch": 2.7257709251101323, |
| "grad_norm": 1.0162299871444702, |
| "learning_rate": 1.7974358974358977e-05, |
| "loss": 0.5228, |
| "step": 2475 |
| }, |
| { |
| "epoch": 2.753303964757709, |
| "grad_norm": 1.1346451044082642, |
| "learning_rate": 1.794871794871795e-05, |
| "loss": 0.5937, |
| "step": 2500 |
| }, |
| { |
| "epoch": 2.7808370044052864, |
| "grad_norm": 1.3441213369369507, |
| "learning_rate": 1.7923076923076925e-05, |
| "loss": 0.5553, |
| "step": 2525 |
| }, |
| { |
| "epoch": 2.8083700440528636, |
| "grad_norm": 1.1649460792541504, |
| "learning_rate": 1.78974358974359e-05, |
| "loss": 0.507, |
| "step": 2550 |
| }, |
| { |
| "epoch": 2.8359030837004404, |
| "grad_norm": 0.9251694083213806, |
| "learning_rate": 1.7871794871794875e-05, |
| "loss": 0.5293, |
| "step": 2575 |
| }, |
| { |
| "epoch": 2.8634361233480177, |
| "grad_norm": 1.4110281467437744, |
| "learning_rate": 1.784615384615385e-05, |
| "loss": 0.5654, |
| "step": 2600 |
| }, |
| { |
| "epoch": 2.890969162995595, |
| "grad_norm": 1.63406240940094, |
| "learning_rate": 1.7820512820512823e-05, |
| "loss": 0.5154, |
| "step": 2625 |
| }, |
| { |
| "epoch": 2.9185022026431717, |
| "grad_norm": 1.6731914281845093, |
| "learning_rate": 1.7794871794871796e-05, |
| "loss": 0.5209, |
| "step": 2650 |
| }, |
| { |
| "epoch": 2.946035242290749, |
| "grad_norm": 0.8957790732383728, |
| "learning_rate": 1.776923076923077e-05, |
| "loss": 0.5275, |
| "step": 2675 |
| }, |
| { |
| "epoch": 2.9735682819383262, |
| "grad_norm": 1.0544830560684204, |
| "learning_rate": 1.7743589743589744e-05, |
| "loss": 0.5149, |
| "step": 2700 |
| }, |
| { |
| "epoch": 3.001101321585903, |
| "grad_norm": 1.1973352432250977, |
| "learning_rate": 1.7717948717948717e-05, |
| "loss": 0.5632, |
| "step": 2725 |
| }, |
| { |
| "epoch": 3.0286343612334803, |
| "grad_norm": 0.9459331035614014, |
| "learning_rate": 1.7692307692307694e-05, |
| "loss": 0.3906, |
| "step": 2750 |
| }, |
| { |
| "epoch": 3.056167400881057, |
| "grad_norm": 0.7281056642532349, |
| "learning_rate": 1.7666666666666668e-05, |
| "loss": 0.3398, |
| "step": 2775 |
| }, |
| { |
| "epoch": 3.0837004405286343, |
| "grad_norm": 0.9286469221115112, |
| "learning_rate": 1.7641025641025642e-05, |
| "loss": 0.3603, |
| "step": 2800 |
| }, |
| { |
| "epoch": 3.1112334801762116, |
| "grad_norm": 0.6543312072753906, |
| "learning_rate": 1.7615384615384615e-05, |
| "loss": 0.348, |
| "step": 2825 |
| }, |
| { |
| "epoch": 3.1387665198237884, |
| "grad_norm": 0.852942168712616, |
| "learning_rate": 1.7589743589743592e-05, |
| "loss": 0.3527, |
| "step": 2850 |
| }, |
| { |
| "epoch": 3.1662995594713657, |
| "grad_norm": 1.0175626277923584, |
| "learning_rate": 1.7564102564102566e-05, |
| "loss": 0.3779, |
| "step": 2875 |
| }, |
| { |
| "epoch": 3.193832599118943, |
| "grad_norm": 0.9942947030067444, |
| "learning_rate": 1.753846153846154e-05, |
| "loss": 0.3732, |
| "step": 2900 |
| }, |
| { |
| "epoch": 3.2213656387665197, |
| "grad_norm": 0.713638186454773, |
| "learning_rate": 1.7512820512820513e-05, |
| "loss": 0.3757, |
| "step": 2925 |
| }, |
| { |
| "epoch": 3.248898678414097, |
| "grad_norm": 0.9049544334411621, |
| "learning_rate": 1.7487179487179487e-05, |
| "loss": 0.3769, |
| "step": 2950 |
| }, |
| { |
| "epoch": 3.2764317180616738, |
| "grad_norm": 0.7445310354232788, |
| "learning_rate": 1.7461538461538464e-05, |
| "loss": 0.361, |
| "step": 2975 |
| }, |
| { |
| "epoch": 3.303964757709251, |
| "grad_norm": 0.6536186933517456, |
| "learning_rate": 1.7435897435897438e-05, |
| "loss": 0.3394, |
| "step": 3000 |
| }, |
| { |
| "epoch": 3.303964757709251, |
| "eval_cer": 34.55343734595853, |
| "eval_loss": 0.7762519717216492, |
| "eval_runtime": 1343.0708, |
| "eval_samples_per_second": 7.878, |
| "eval_steps_per_second": 1.97, |
| "eval_wer": 88.5997171145686, |
| "step": 3000 |
| }, |
| { |
| "epoch": 3.3314977973568283, |
| "grad_norm": 0.820868194103241, |
| "learning_rate": 1.741025641025641e-05, |
| "loss": 0.3648, |
| "step": 3025 |
| }, |
| { |
| "epoch": 3.359030837004405, |
| "grad_norm": 0.8310422897338867, |
| "learning_rate": 1.7384615384615385e-05, |
| "loss": 0.3907, |
| "step": 3050 |
| }, |
| { |
| "epoch": 3.3865638766519823, |
| "grad_norm": 0.9818564057350159, |
| "learning_rate": 1.7358974358974362e-05, |
| "loss": 0.3515, |
| "step": 3075 |
| }, |
| { |
| "epoch": 3.4140969162995596, |
| "grad_norm": 0.8036455512046814, |
| "learning_rate": 1.7333333333333336e-05, |
| "loss": 0.4122, |
| "step": 3100 |
| }, |
| { |
| "epoch": 3.4416299559471364, |
| "grad_norm": 0.8108682632446289, |
| "learning_rate": 1.730769230769231e-05, |
| "loss": 0.3818, |
| "step": 3125 |
| }, |
| { |
| "epoch": 3.4691629955947136, |
| "grad_norm": 1.3358936309814453, |
| "learning_rate": 1.7282051282051283e-05, |
| "loss": 0.3533, |
| "step": 3150 |
| }, |
| { |
| "epoch": 3.496696035242291, |
| "grad_norm": 0.8960527777671814, |
| "learning_rate": 1.725641025641026e-05, |
| "loss": 0.3816, |
| "step": 3175 |
| }, |
| { |
| "epoch": 3.5242290748898677, |
| "grad_norm": 2.2398922443389893, |
| "learning_rate": 1.7230769230769234e-05, |
| "loss": 0.3855, |
| "step": 3200 |
| }, |
| { |
| "epoch": 3.551762114537445, |
| "grad_norm": 0.8993757367134094, |
| "learning_rate": 1.7205128205128207e-05, |
| "loss": 0.3503, |
| "step": 3225 |
| }, |
| { |
| "epoch": 3.579295154185022, |
| "grad_norm": 0.8796055912971497, |
| "learning_rate": 1.717948717948718e-05, |
| "loss": 0.3479, |
| "step": 3250 |
| }, |
| { |
| "epoch": 3.606828193832599, |
| "grad_norm": 0.7127211093902588, |
| "learning_rate": 1.7153846153846155e-05, |
| "loss": 0.3803, |
| "step": 3275 |
| }, |
| { |
| "epoch": 3.6343612334801763, |
| "grad_norm": 0.9272491335868835, |
| "learning_rate": 1.7128205128205128e-05, |
| "loss": 0.3819, |
| "step": 3300 |
| }, |
| { |
| "epoch": 3.6618942731277535, |
| "grad_norm": 0.6483042240142822, |
| "learning_rate": 1.7102564102564102e-05, |
| "loss": 0.3236, |
| "step": 3325 |
| }, |
| { |
| "epoch": 3.6894273127753303, |
| "grad_norm": 0.8154418468475342, |
| "learning_rate": 1.707692307692308e-05, |
| "loss": 0.3622, |
| "step": 3350 |
| }, |
| { |
| "epoch": 3.7169603524229076, |
| "grad_norm": 0.9721470475196838, |
| "learning_rate": 1.7051282051282053e-05, |
| "loss": 0.3215, |
| "step": 3375 |
| }, |
| { |
| "epoch": 3.744493392070485, |
| "grad_norm": 0.7829445004463196, |
| "learning_rate": 1.7025641025641026e-05, |
| "loss": 0.3652, |
| "step": 3400 |
| }, |
| { |
| "epoch": 3.7720264317180616, |
| "grad_norm": 0.8178221583366394, |
| "learning_rate": 1.7e-05, |
| "loss": 0.3522, |
| "step": 3425 |
| }, |
| { |
| "epoch": 3.799559471365639, |
| "grad_norm": 0.9240100979804993, |
| "learning_rate": 1.6974358974358977e-05, |
| "loss": 0.3553, |
| "step": 3450 |
| }, |
| { |
| "epoch": 3.827092511013216, |
| "grad_norm": 0.7727634906768799, |
| "learning_rate": 1.694871794871795e-05, |
| "loss": 0.3343, |
| "step": 3475 |
| }, |
| { |
| "epoch": 3.854625550660793, |
| "grad_norm": 0.8703585863113403, |
| "learning_rate": 1.6923076923076924e-05, |
| "loss": 0.4124, |
| "step": 3500 |
| }, |
| { |
| "epoch": 3.88215859030837, |
| "grad_norm": 0.6991782188415527, |
| "learning_rate": 1.6897435897435898e-05, |
| "loss": 0.3452, |
| "step": 3525 |
| }, |
| { |
| "epoch": 3.909691629955947, |
| "grad_norm": 0.8196091651916504, |
| "learning_rate": 1.687179487179487e-05, |
| "loss": 0.3681, |
| "step": 3550 |
| }, |
| { |
| "epoch": 3.9372246696035242, |
| "grad_norm": 0.7455742955207825, |
| "learning_rate": 1.684615384615385e-05, |
| "loss": 0.3963, |
| "step": 3575 |
| }, |
| { |
| "epoch": 3.964757709251101, |
| "grad_norm": 1.1615126132965088, |
| "learning_rate": 1.6820512820512822e-05, |
| "loss": 0.3811, |
| "step": 3600 |
| }, |
| { |
| "epoch": 3.9922907488986783, |
| "grad_norm": 1.1333736181259155, |
| "learning_rate": 1.6794871794871796e-05, |
| "loss": 0.3724, |
| "step": 3625 |
| }, |
| { |
| "epoch": 4.0198237885462555, |
| "grad_norm": 0.9983144998550415, |
| "learning_rate": 1.676923076923077e-05, |
| "loss": 0.2523, |
| "step": 3650 |
| }, |
| { |
| "epoch": 4.047356828193832, |
| "grad_norm": 0.8323332667350769, |
| "learning_rate": 1.6743589743589747e-05, |
| "loss": 0.24, |
| "step": 3675 |
| }, |
| { |
| "epoch": 4.07488986784141, |
| "grad_norm": 0.9976752400398254, |
| "learning_rate": 1.671794871794872e-05, |
| "loss": 0.2255, |
| "step": 3700 |
| }, |
| { |
| "epoch": 4.102422907488987, |
| "grad_norm": 0.56157386302948, |
| "learning_rate": 1.6692307692307694e-05, |
| "loss": 0.2547, |
| "step": 3725 |
| }, |
| { |
| "epoch": 4.129955947136564, |
| "grad_norm": 0.7160229086875916, |
| "learning_rate": 1.6666666666666667e-05, |
| "loss": 0.2373, |
| "step": 3750 |
| }, |
| { |
| "epoch": 4.157488986784141, |
| "grad_norm": 0.88517165184021, |
| "learning_rate": 1.6641025641025645e-05, |
| "loss": 0.2279, |
| "step": 3775 |
| }, |
| { |
| "epoch": 4.185022026431718, |
| "grad_norm": 0.5281697511672974, |
| "learning_rate": 1.6615384615384618e-05, |
| "loss": 0.1878, |
| "step": 3800 |
| }, |
| { |
| "epoch": 4.212555066079295, |
| "grad_norm": 0.5709527134895325, |
| "learning_rate": 1.6589743589743592e-05, |
| "loss": 0.2223, |
| "step": 3825 |
| }, |
| { |
| "epoch": 4.240088105726873, |
| "grad_norm": 0.8601750135421753, |
| "learning_rate": 1.6564102564102565e-05, |
| "loss": 0.2166, |
| "step": 3850 |
| }, |
| { |
| "epoch": 4.2676211453744495, |
| "grad_norm": 0.5579825043678284, |
| "learning_rate": 1.653846153846154e-05, |
| "loss": 0.251, |
| "step": 3875 |
| }, |
| { |
| "epoch": 4.295154185022026, |
| "grad_norm": 0.5097801089286804, |
| "learning_rate": 1.6512820512820513e-05, |
| "loss": 0.2342, |
| "step": 3900 |
| }, |
| { |
| "epoch": 4.322687224669604, |
| "grad_norm": 0.6469748020172119, |
| "learning_rate": 1.6487179487179486e-05, |
| "loss": 0.2465, |
| "step": 3925 |
| }, |
| { |
| "epoch": 4.350220264317181, |
| "grad_norm": 0.7211927175521851, |
| "learning_rate": 1.6461538461538463e-05, |
| "loss": 0.2358, |
| "step": 3950 |
| }, |
| { |
| "epoch": 4.377753303964758, |
| "grad_norm": 0.6948506832122803, |
| "learning_rate": 1.6435897435897437e-05, |
| "loss": 0.2603, |
| "step": 3975 |
| }, |
| { |
| "epoch": 4.405286343612334, |
| "grad_norm": 0.7800878882408142, |
| "learning_rate": 1.641025641025641e-05, |
| "loss": 0.243, |
| "step": 4000 |
| }, |
| { |
| "epoch": 4.405286343612334, |
| "eval_cer": 26.809808214781107, |
| "eval_loss": 0.7649882435798645, |
| "eval_runtime": 1301.1909, |
| "eval_samples_per_second": 8.132, |
| "eval_steps_per_second": 2.034, |
| "eval_wer": 86.5912305516266, |
| "step": 4000 |
| }, |
| { |
| "epoch": 4.432819383259912, |
| "grad_norm": 0.6358670592308044, |
| "learning_rate": 1.6384615384615384e-05, |
| "loss": 0.2619, |
| "step": 4025 |
| }, |
| { |
| "epoch": 4.460352422907489, |
| "grad_norm": 0.7865478992462158, |
| "learning_rate": 1.635897435897436e-05, |
| "loss": 0.2671, |
| "step": 4050 |
| }, |
| { |
| "epoch": 4.487885462555066, |
| "grad_norm": 0.890876054763794, |
| "learning_rate": 1.6333333333333335e-05, |
| "loss": 0.2592, |
| "step": 4075 |
| }, |
| { |
| "epoch": 4.515418502202643, |
| "grad_norm": 0.7817333936691284, |
| "learning_rate": 1.630769230769231e-05, |
| "loss": 0.2229, |
| "step": 4100 |
| }, |
| { |
| "epoch": 4.54295154185022, |
| "grad_norm": 1.0560652017593384, |
| "learning_rate": 1.6282051282051282e-05, |
| "loss": 0.2499, |
| "step": 4125 |
| }, |
| { |
| "epoch": 4.570484581497797, |
| "grad_norm": 0.9233481884002686, |
| "learning_rate": 1.625641025641026e-05, |
| "loss": 0.2623, |
| "step": 4150 |
| }, |
| { |
| "epoch": 4.598017621145375, |
| "grad_norm": 0.7783260941505432, |
| "learning_rate": 1.6230769230769233e-05, |
| "loss": 0.2484, |
| "step": 4175 |
| }, |
| { |
| "epoch": 4.6255506607929515, |
| "grad_norm": 0.765825629234314, |
| "learning_rate": 1.6205128205128207e-05, |
| "loss": 0.2427, |
| "step": 4200 |
| }, |
| { |
| "epoch": 4.653083700440528, |
| "grad_norm": 1.1650230884552002, |
| "learning_rate": 1.617948717948718e-05, |
| "loss": 0.2443, |
| "step": 4225 |
| }, |
| { |
| "epoch": 4.680616740088106, |
| "grad_norm": 0.8231481909751892, |
| "learning_rate": 1.6153846153846154e-05, |
| "loss": 0.2316, |
| "step": 4250 |
| }, |
| { |
| "epoch": 4.708149779735683, |
| "grad_norm": 0.5373929738998413, |
| "learning_rate": 1.612820512820513e-05, |
| "loss": 0.2419, |
| "step": 4275 |
| }, |
| { |
| "epoch": 4.73568281938326, |
| "grad_norm": 0.7449804544448853, |
| "learning_rate": 1.6102564102564105e-05, |
| "loss": 0.2484, |
| "step": 4300 |
| }, |
| { |
| "epoch": 4.763215859030837, |
| "grad_norm": 0.889363706111908, |
| "learning_rate": 1.607692307692308e-05, |
| "loss": 0.245, |
| "step": 4325 |
| }, |
| { |
| "epoch": 4.790748898678414, |
| "grad_norm": 0.8369229435920715, |
| "learning_rate": 1.6051282051282052e-05, |
| "loss": 0.2394, |
| "step": 4350 |
| }, |
| { |
| "epoch": 4.818281938325991, |
| "grad_norm": 0.9110807180404663, |
| "learning_rate": 1.602564102564103e-05, |
| "loss": 0.2509, |
| "step": 4375 |
| }, |
| { |
| "epoch": 4.845814977973569, |
| "grad_norm": 0.4285617768764496, |
| "learning_rate": 1.6000000000000003e-05, |
| "loss": 0.2244, |
| "step": 4400 |
| }, |
| { |
| "epoch": 4.8733480176211454, |
| "grad_norm": 0.6245120763778687, |
| "learning_rate": 1.5974358974358976e-05, |
| "loss": 0.2297, |
| "step": 4425 |
| }, |
| { |
| "epoch": 4.900881057268722, |
| "grad_norm": 0.5603160262107849, |
| "learning_rate": 1.594871794871795e-05, |
| "loss": 0.2383, |
| "step": 4450 |
| }, |
| { |
| "epoch": 4.9284140969163, |
| "grad_norm": 1.5242382287979126, |
| "learning_rate": 1.5923076923076924e-05, |
| "loss": 0.222, |
| "step": 4475 |
| }, |
| { |
| "epoch": 4.955947136563877, |
| "grad_norm": 0.4880862832069397, |
| "learning_rate": 1.5897435897435897e-05, |
| "loss": 0.2412, |
| "step": 4500 |
| }, |
| { |
| "epoch": 4.983480176211454, |
| "grad_norm": 0.8488364815711975, |
| "learning_rate": 1.587179487179487e-05, |
| "loss": 0.2459, |
| "step": 4525 |
| }, |
| { |
| "epoch": 5.011013215859031, |
| "grad_norm": 0.6367073059082031, |
| "learning_rate": 1.5846153846153848e-05, |
| "loss": 0.2024, |
| "step": 4550 |
| }, |
| { |
| "epoch": 5.038546255506608, |
| "grad_norm": 0.6037558913230896, |
| "learning_rate": 1.582051282051282e-05, |
| "loss": 0.183, |
| "step": 4575 |
| }, |
| { |
| "epoch": 5.066079295154185, |
| "grad_norm": 0.6983600854873657, |
| "learning_rate": 1.5794871794871795e-05, |
| "loss": 0.1353, |
| "step": 4600 |
| }, |
| { |
| "epoch": 5.093612334801762, |
| "grad_norm": 0.39246147871017456, |
| "learning_rate": 1.576923076923077e-05, |
| "loss": 0.1867, |
| "step": 4625 |
| }, |
| { |
| "epoch": 5.121145374449339, |
| "grad_norm": 0.6254175305366516, |
| "learning_rate": 1.5743589743589746e-05, |
| "loss": 0.1741, |
| "step": 4650 |
| }, |
| { |
| "epoch": 5.148678414096916, |
| "grad_norm": 0.3912099599838257, |
| "learning_rate": 1.571794871794872e-05, |
| "loss": 0.1744, |
| "step": 4675 |
| }, |
| { |
| "epoch": 5.176211453744493, |
| "grad_norm": 0.46106234192848206, |
| "learning_rate": 1.5692307692307693e-05, |
| "loss": 0.1437, |
| "step": 4700 |
| }, |
| { |
| "epoch": 5.203744493392071, |
| "grad_norm": 0.7368497252464294, |
| "learning_rate": 1.5666666666666667e-05, |
| "loss": 0.1606, |
| "step": 4725 |
| }, |
| { |
| "epoch": 5.2312775330396475, |
| "grad_norm": 0.9352337718009949, |
| "learning_rate": 1.5641025641025644e-05, |
| "loss": 0.1395, |
| "step": 4750 |
| }, |
| { |
| "epoch": 5.258810572687224, |
| "grad_norm": 0.785354733467102, |
| "learning_rate": 1.5615384615384618e-05, |
| "loss": 0.2019, |
| "step": 4775 |
| }, |
| { |
| "epoch": 5.286343612334802, |
| "grad_norm": 0.511448860168457, |
| "learning_rate": 1.558974358974359e-05, |
| "loss": 0.1484, |
| "step": 4800 |
| }, |
| { |
| "epoch": 5.313876651982379, |
| "grad_norm": 0.6866703033447266, |
| "learning_rate": 1.5564102564102565e-05, |
| "loss": 0.1413, |
| "step": 4825 |
| }, |
| { |
| "epoch": 5.341409691629956, |
| "grad_norm": 0.5879825949668884, |
| "learning_rate": 1.553846153846154e-05, |
| "loss": 0.1611, |
| "step": 4850 |
| }, |
| { |
| "epoch": 5.368942731277533, |
| "grad_norm": 0.5954814553260803, |
| "learning_rate": 1.5512820512820516e-05, |
| "loss": 0.1603, |
| "step": 4875 |
| }, |
| { |
| "epoch": 5.39647577092511, |
| "grad_norm": 0.6819676756858826, |
| "learning_rate": 1.548717948717949e-05, |
| "loss": 0.1458, |
| "step": 4900 |
| }, |
| { |
| "epoch": 5.424008810572687, |
| "grad_norm": 0.895698070526123, |
| "learning_rate": 1.5461538461538463e-05, |
| "loss": 0.2016, |
| "step": 4925 |
| }, |
| { |
| "epoch": 5.451541850220265, |
| "grad_norm": 0.6772642135620117, |
| "learning_rate": 1.5435897435897436e-05, |
| "loss": 0.1762, |
| "step": 4950 |
| }, |
| { |
| "epoch": 5.479074889867841, |
| "grad_norm": 0.49580076336860657, |
| "learning_rate": 1.5410256410256414e-05, |
| "loss": 0.1563, |
| "step": 4975 |
| }, |
| { |
| "epoch": 5.506607929515418, |
| "grad_norm": 0.5544493198394775, |
| "learning_rate": 1.5384615384615387e-05, |
| "loss": 0.1633, |
| "step": 5000 |
| }, |
| { |
| "epoch": 5.506607929515418, |
| "eval_cer": 27.411644675753443, |
| "eval_loss": 0.7654321789741516, |
| "eval_runtime": 1299.7877, |
| "eval_samples_per_second": 8.141, |
| "eval_steps_per_second": 2.036, |
| "eval_wer": 88.18481848184818, |
| "step": 5000 |
| }, |
| { |
| "epoch": 5.534140969162996, |
| "grad_norm": 0.5057178139686584, |
| "learning_rate": 1.535897435897436e-05, |
| "loss": 0.1553, |
| "step": 5025 |
| }, |
| { |
| "epoch": 5.561674008810573, |
| "grad_norm": 0.6811597347259521, |
| "learning_rate": 1.5333333333333334e-05, |
| "loss": 0.186, |
| "step": 5050 |
| }, |
| { |
| "epoch": 5.5892070484581495, |
| "grad_norm": 0.6146376132965088, |
| "learning_rate": 1.5307692307692308e-05, |
| "loss": 0.1658, |
| "step": 5075 |
| }, |
| { |
| "epoch": 5.616740088105727, |
| "grad_norm": 0.5837799310684204, |
| "learning_rate": 1.5282051282051282e-05, |
| "loss": 0.15, |
| "step": 5100 |
| }, |
| { |
| "epoch": 5.644273127753304, |
| "grad_norm": 0.8449403643608093, |
| "learning_rate": 1.5256410256410257e-05, |
| "loss": 0.1698, |
| "step": 5125 |
| }, |
| { |
| "epoch": 5.671806167400881, |
| "grad_norm": 0.5833331346511841, |
| "learning_rate": 1.523076923076923e-05, |
| "loss": 0.175, |
| "step": 5150 |
| }, |
| { |
| "epoch": 5.6993392070484585, |
| "grad_norm": 0.4615674912929535, |
| "learning_rate": 1.5205128205128206e-05, |
| "loss": 0.1374, |
| "step": 5175 |
| }, |
| { |
| "epoch": 5.726872246696035, |
| "grad_norm": 0.5180462598800659, |
| "learning_rate": 1.517948717948718e-05, |
| "loss": 0.2045, |
| "step": 5200 |
| }, |
| { |
| "epoch": 5.754405286343612, |
| "grad_norm": 0.4603562653064728, |
| "learning_rate": 1.5153846153846155e-05, |
| "loss": 0.1595, |
| "step": 5225 |
| }, |
| { |
| "epoch": 5.78193832599119, |
| "grad_norm": 0.7777943015098572, |
| "learning_rate": 1.5128205128205129e-05, |
| "loss": 0.1954, |
| "step": 5250 |
| }, |
| { |
| "epoch": 5.809471365638767, |
| "grad_norm": 0.4417853355407715, |
| "learning_rate": 1.5102564102564104e-05, |
| "loss": 0.1665, |
| "step": 5275 |
| }, |
| { |
| "epoch": 5.8370044052863435, |
| "grad_norm": 0.6847853660583496, |
| "learning_rate": 1.5076923076923078e-05, |
| "loss": 0.1629, |
| "step": 5300 |
| }, |
| { |
| "epoch": 5.864537444933921, |
| "grad_norm": 0.6507083177566528, |
| "learning_rate": 1.5051282051282053e-05, |
| "loss": 0.1677, |
| "step": 5325 |
| }, |
| { |
| "epoch": 5.892070484581498, |
| "grad_norm": 0.6313048005104065, |
| "learning_rate": 1.5025641025641027e-05, |
| "loss": 0.1863, |
| "step": 5350 |
| }, |
| { |
| "epoch": 5.919603524229075, |
| "grad_norm": 0.5796169638633728, |
| "learning_rate": 1.5000000000000002e-05, |
| "loss": 0.1497, |
| "step": 5375 |
| }, |
| { |
| "epoch": 5.9471365638766525, |
| "grad_norm": 0.6254355907440186, |
| "learning_rate": 1.4974358974358976e-05, |
| "loss": 0.168, |
| "step": 5400 |
| }, |
| { |
| "epoch": 5.974669603524229, |
| "grad_norm": 0.742472767829895, |
| "learning_rate": 1.494871794871795e-05, |
| "loss": 0.1474, |
| "step": 5425 |
| }, |
| { |
| "epoch": 6.002202643171806, |
| "grad_norm": 0.45657020807266235, |
| "learning_rate": 1.4923076923076925e-05, |
| "loss": 0.1376, |
| "step": 5450 |
| }, |
| { |
| "epoch": 6.029735682819383, |
| "grad_norm": 0.49746203422546387, |
| "learning_rate": 1.4897435897435898e-05, |
| "loss": 0.1143, |
| "step": 5475 |
| }, |
| { |
| "epoch": 6.057268722466961, |
| "grad_norm": 0.41658642888069153, |
| "learning_rate": 1.4871794871794874e-05, |
| "loss": 0.0941, |
| "step": 5500 |
| }, |
| { |
| "epoch": 6.084801762114537, |
| "grad_norm": 0.5831480026245117, |
| "learning_rate": 1.4846153846153847e-05, |
| "loss": 0.1195, |
| "step": 5525 |
| }, |
| { |
| "epoch": 6.112334801762114, |
| "grad_norm": 0.4936138391494751, |
| "learning_rate": 1.4820512820512823e-05, |
| "loss": 0.1244, |
| "step": 5550 |
| }, |
| { |
| "epoch": 6.139867841409692, |
| "grad_norm": 0.5034681558609009, |
| "learning_rate": 1.4794871794871796e-05, |
| "loss": 0.1133, |
| "step": 5575 |
| }, |
| { |
| "epoch": 6.167400881057269, |
| "grad_norm": 0.6369169354438782, |
| "learning_rate": 1.4769230769230772e-05, |
| "loss": 0.1292, |
| "step": 5600 |
| }, |
| { |
| "epoch": 6.1949339207048455, |
| "grad_norm": 0.3793521225452423, |
| "learning_rate": 1.4743589743589745e-05, |
| "loss": 0.0927, |
| "step": 5625 |
| }, |
| { |
| "epoch": 6.222466960352423, |
| "grad_norm": 0.6850873827934265, |
| "learning_rate": 1.471794871794872e-05, |
| "loss": 0.1249, |
| "step": 5650 |
| }, |
| { |
| "epoch": 6.25, |
| "grad_norm": 0.5474864840507507, |
| "learning_rate": 1.4692307692307694e-05, |
| "loss": 0.1206, |
| "step": 5675 |
| }, |
| { |
| "epoch": 6.277533039647577, |
| "grad_norm": 0.2814996838569641, |
| "learning_rate": 1.4666666666666666e-05, |
| "loss": 0.0832, |
| "step": 5700 |
| }, |
| { |
| "epoch": 6.3050660792951545, |
| "grad_norm": 0.3831164538860321, |
| "learning_rate": 1.4641025641025642e-05, |
| "loss": 0.1137, |
| "step": 5725 |
| }, |
| { |
| "epoch": 6.332599118942731, |
| "grad_norm": 0.48341113328933716, |
| "learning_rate": 1.4615384615384615e-05, |
| "loss": 0.096, |
| "step": 5750 |
| }, |
| { |
| "epoch": 6.360132158590308, |
| "grad_norm": 0.4037848711013794, |
| "learning_rate": 1.458974358974359e-05, |
| "loss": 0.096, |
| "step": 5775 |
| }, |
| { |
| "epoch": 6.387665198237886, |
| "grad_norm": 0.5997889637947083, |
| "learning_rate": 1.4564102564102564e-05, |
| "loss": 0.1143, |
| "step": 5800 |
| }, |
| { |
| "epoch": 6.415198237885463, |
| "grad_norm": 0.6279116272926331, |
| "learning_rate": 1.453846153846154e-05, |
| "loss": 0.102, |
| "step": 5825 |
| }, |
| { |
| "epoch": 6.442731277533039, |
| "grad_norm": 0.5346375703811646, |
| "learning_rate": 1.4512820512820513e-05, |
| "loss": 0.1336, |
| "step": 5850 |
| }, |
| { |
| "epoch": 6.470264317180617, |
| "grad_norm": 0.5246495008468628, |
| "learning_rate": 1.4487179487179489e-05, |
| "loss": 0.1357, |
| "step": 5875 |
| }, |
| { |
| "epoch": 6.497797356828194, |
| "grad_norm": 0.4948612153530121, |
| "learning_rate": 1.4461538461538462e-05, |
| "loss": 0.1229, |
| "step": 5900 |
| }, |
| { |
| "epoch": 6.525330396475771, |
| "grad_norm": 0.6798549294471741, |
| "learning_rate": 1.4435897435897438e-05, |
| "loss": 0.1319, |
| "step": 5925 |
| }, |
| { |
| "epoch": 6.5528634361233475, |
| "grad_norm": 0.26407966017723083, |
| "learning_rate": 1.4410256410256411e-05, |
| "loss": 0.1187, |
| "step": 5950 |
| }, |
| { |
| "epoch": 6.580396475770925, |
| "grad_norm": 0.7821138501167297, |
| "learning_rate": 1.4384615384615387e-05, |
| "loss": 0.1218, |
| "step": 5975 |
| }, |
| { |
| "epoch": 6.607929515418502, |
| "grad_norm": 0.5022340416908264, |
| "learning_rate": 1.435897435897436e-05, |
| "loss": 0.1113, |
| "step": 6000 |
| }, |
| { |
| "epoch": 6.607929515418502, |
| "eval_cer": 26.76395400823083, |
| "eval_loss": 0.7905788421630859, |
| "eval_runtime": 1296.6476, |
| "eval_samples_per_second": 8.16, |
| "eval_steps_per_second": 2.041, |
| "eval_wer": 86.42149929278642, |
| "step": 6000 |
| }, |
| { |
| "epoch": 6.635462555066079, |
| "grad_norm": 0.55087810754776, |
| "learning_rate": 1.4333333333333334e-05, |
| "loss": 0.0871, |
| "step": 6025 |
| }, |
| { |
| "epoch": 6.6629955947136565, |
| "grad_norm": 0.2666168510913849, |
| "learning_rate": 1.430769230769231e-05, |
| "loss": 0.1377, |
| "step": 6050 |
| }, |
| { |
| "epoch": 6.690528634361233, |
| "grad_norm": 0.6376879811286926, |
| "learning_rate": 1.4282051282051283e-05, |
| "loss": 0.1371, |
| "step": 6075 |
| }, |
| { |
| "epoch": 6.71806167400881, |
| "grad_norm": 0.6430523991584778, |
| "learning_rate": 1.4256410256410258e-05, |
| "loss": 0.1426, |
| "step": 6100 |
| }, |
| { |
| "epoch": 6.745594713656388, |
| "grad_norm": 0.4945014715194702, |
| "learning_rate": 1.4230769230769232e-05, |
| "loss": 0.1288, |
| "step": 6125 |
| }, |
| { |
| "epoch": 6.773127753303965, |
| "grad_norm": 0.7737772464752197, |
| "learning_rate": 1.4205128205128207e-05, |
| "loss": 0.1124, |
| "step": 6150 |
| }, |
| { |
| "epoch": 6.8006607929515415, |
| "grad_norm": 0.6132165789604187, |
| "learning_rate": 1.4179487179487181e-05, |
| "loss": 0.1234, |
| "step": 6175 |
| }, |
| { |
| "epoch": 6.828193832599119, |
| "grad_norm": 0.3980983793735504, |
| "learning_rate": 1.4153846153846156e-05, |
| "loss": 0.1079, |
| "step": 6200 |
| }, |
| { |
| "epoch": 6.855726872246696, |
| "grad_norm": 0.583961009979248, |
| "learning_rate": 1.412820512820513e-05, |
| "loss": 0.1378, |
| "step": 6225 |
| }, |
| { |
| "epoch": 6.883259911894273, |
| "grad_norm": 0.37798672914505005, |
| "learning_rate": 1.4102564102564105e-05, |
| "loss": 0.1087, |
| "step": 6250 |
| }, |
| { |
| "epoch": 6.9107929515418505, |
| "grad_norm": 0.4246102571487427, |
| "learning_rate": 1.4076923076923079e-05, |
| "loss": 0.0986, |
| "step": 6275 |
| }, |
| { |
| "epoch": 6.938325991189427, |
| "grad_norm": 0.5898611545562744, |
| "learning_rate": 1.405128205128205e-05, |
| "loss": 0.1116, |
| "step": 6300 |
| }, |
| { |
| "epoch": 6.965859030837004, |
| "grad_norm": 0.6355592608451843, |
| "learning_rate": 1.4025641025641026e-05, |
| "loss": 0.1243, |
| "step": 6325 |
| }, |
| { |
| "epoch": 6.993392070484582, |
| "grad_norm": 0.7065783739089966, |
| "learning_rate": 1.4e-05, |
| "loss": 0.1175, |
| "step": 6350 |
| }, |
| { |
| "epoch": 7.020925110132159, |
| "grad_norm": 0.2747715711593628, |
| "learning_rate": 1.3974358974358975e-05, |
| "loss": 0.0794, |
| "step": 6375 |
| }, |
| { |
| "epoch": 7.048458149779735, |
| "grad_norm": 0.4818078875541687, |
| "learning_rate": 1.3948717948717949e-05, |
| "loss": 0.0762, |
| "step": 6400 |
| }, |
| { |
| "epoch": 7.075991189427313, |
| "grad_norm": 0.32405680418014526, |
| "learning_rate": 1.3923076923076924e-05, |
| "loss": 0.0672, |
| "step": 6425 |
| }, |
| { |
| "epoch": 7.10352422907489, |
| "grad_norm": 0.43055686354637146, |
| "learning_rate": 1.3897435897435898e-05, |
| "loss": 0.0608, |
| "step": 6450 |
| }, |
| { |
| "epoch": 7.131057268722467, |
| "grad_norm": 0.18856699764728546, |
| "learning_rate": 1.3871794871794873e-05, |
| "loss": 0.0737, |
| "step": 6475 |
| }, |
| { |
| "epoch": 7.158590308370044, |
| "grad_norm": 0.3173889219760895, |
| "learning_rate": 1.3846153846153847e-05, |
| "loss": 0.082, |
| "step": 6500 |
| }, |
| { |
| "epoch": 7.186123348017621, |
| "grad_norm": 0.6276409029960632, |
| "learning_rate": 1.3820512820512822e-05, |
| "loss": 0.0711, |
| "step": 6525 |
| }, |
| { |
| "epoch": 7.213656387665198, |
| "grad_norm": 0.6348710656166077, |
| "learning_rate": 1.3794871794871796e-05, |
| "loss": 0.0702, |
| "step": 6550 |
| }, |
| { |
| "epoch": 7.241189427312776, |
| "grad_norm": 0.22085753083229065, |
| "learning_rate": 1.3769230769230771e-05, |
| "loss": 0.08, |
| "step": 6575 |
| }, |
| { |
| "epoch": 7.2687224669603525, |
| "grad_norm": 0.5453472137451172, |
| "learning_rate": 1.3743589743589745e-05, |
| "loss": 0.0747, |
| "step": 6600 |
| }, |
| { |
| "epoch": 7.296255506607929, |
| "grad_norm": 0.31948912143707275, |
| "learning_rate": 1.3717948717948718e-05, |
| "loss": 0.091, |
| "step": 6625 |
| }, |
| { |
| "epoch": 7.323788546255507, |
| "grad_norm": 0.3135238289833069, |
| "learning_rate": 1.3692307692307694e-05, |
| "loss": 0.0786, |
| "step": 6650 |
| }, |
| { |
| "epoch": 7.351321585903084, |
| "grad_norm": 0.5533322691917419, |
| "learning_rate": 1.3666666666666667e-05, |
| "loss": 0.0864, |
| "step": 6675 |
| }, |
| { |
| "epoch": 7.378854625550661, |
| "grad_norm": 0.38217464089393616, |
| "learning_rate": 1.3641025641025643e-05, |
| "loss": 0.0634, |
| "step": 6700 |
| }, |
| { |
| "epoch": 7.406387665198238, |
| "grad_norm": 0.16825051605701447, |
| "learning_rate": 1.3615384615384616e-05, |
| "loss": 0.0601, |
| "step": 6725 |
| }, |
| { |
| "epoch": 7.433920704845815, |
| "grad_norm": 0.4651043117046356, |
| "learning_rate": 1.3589743589743592e-05, |
| "loss": 0.0747, |
| "step": 6750 |
| }, |
| { |
| "epoch": 7.461453744493392, |
| "grad_norm": 0.28918442130088806, |
| "learning_rate": 1.3564102564102565e-05, |
| "loss": 0.086, |
| "step": 6775 |
| }, |
| { |
| "epoch": 7.48898678414097, |
| "grad_norm": 0.28500112891197205, |
| "learning_rate": 1.353846153846154e-05, |
| "loss": 0.0773, |
| "step": 6800 |
| }, |
| { |
| "epoch": 7.516519823788546, |
| "grad_norm": 0.4884459376335144, |
| "learning_rate": 1.3512820512820514e-05, |
| "loss": 0.0738, |
| "step": 6825 |
| }, |
| { |
| "epoch": 7.544052863436123, |
| "grad_norm": 0.436380535364151, |
| "learning_rate": 1.348717948717949e-05, |
| "loss": 0.0736, |
| "step": 6850 |
| }, |
| { |
| "epoch": 7.5715859030837, |
| "grad_norm": 0.6121215224266052, |
| "learning_rate": 1.3461538461538463e-05, |
| "loss": 0.0732, |
| "step": 6875 |
| }, |
| { |
| "epoch": 7.599118942731278, |
| "grad_norm": 0.42172011733055115, |
| "learning_rate": 1.3435897435897435e-05, |
| "loss": 0.0947, |
| "step": 6900 |
| }, |
| { |
| "epoch": 7.6266519823788546, |
| "grad_norm": 0.423505961894989, |
| "learning_rate": 1.341025641025641e-05, |
| "loss": 0.0704, |
| "step": 6925 |
| }, |
| { |
| "epoch": 7.654185022026431, |
| "grad_norm": 0.5993038415908813, |
| "learning_rate": 1.3384615384615384e-05, |
| "loss": 0.1009, |
| "step": 6950 |
| }, |
| { |
| "epoch": 7.681718061674009, |
| "grad_norm": 0.5881207585334778, |
| "learning_rate": 1.335897435897436e-05, |
| "loss": 0.0912, |
| "step": 6975 |
| }, |
| { |
| "epoch": 7.709251101321586, |
| "grad_norm": 0.5848076939582825, |
| "learning_rate": 1.3333333333333333e-05, |
| "loss": 0.0673, |
| "step": 7000 |
| }, |
| { |
| "epoch": 7.709251101321586, |
| "eval_cer": 27.13709261403368, |
| "eval_loss": 0.7989184260368347, |
| "eval_runtime": 1306.8838, |
| "eval_samples_per_second": 8.096, |
| "eval_steps_per_second": 2.025, |
| "eval_wer": 84.61103253182462, |
| "step": 7000 |
| }, |
| { |
| "epoch": 7.736784140969163, |
| "grad_norm": 0.8075867295265198, |
| "learning_rate": 1.3307692307692309e-05, |
| "loss": 0.0946, |
| "step": 7025 |
| }, |
| { |
| "epoch": 7.76431718061674, |
| "grad_norm": 0.6053417325019836, |
| "learning_rate": 1.3282051282051282e-05, |
| "loss": 0.0816, |
| "step": 7050 |
| }, |
| { |
| "epoch": 7.791850220264317, |
| "grad_norm": 0.6305066347122192, |
| "learning_rate": 1.3256410256410258e-05, |
| "loss": 0.079, |
| "step": 7075 |
| }, |
| { |
| "epoch": 7.819383259911894, |
| "grad_norm": 0.5551367998123169, |
| "learning_rate": 1.3230769230769231e-05, |
| "loss": 0.0743, |
| "step": 7100 |
| }, |
| { |
| "epoch": 7.846916299559472, |
| "grad_norm": 0.362006276845932, |
| "learning_rate": 1.3205128205128207e-05, |
| "loss": 0.0694, |
| "step": 7125 |
| }, |
| { |
| "epoch": 7.8744493392070485, |
| "grad_norm": 0.3232136368751526, |
| "learning_rate": 1.317948717948718e-05, |
| "loss": 0.0947, |
| "step": 7150 |
| }, |
| { |
| "epoch": 7.901982378854625, |
| "grad_norm": 0.23883138597011566, |
| "learning_rate": 1.3153846153846156e-05, |
| "loss": 0.0896, |
| "step": 7175 |
| }, |
| { |
| "epoch": 7.929515418502203, |
| "grad_norm": 0.49950769543647766, |
| "learning_rate": 1.312820512820513e-05, |
| "loss": 0.1053, |
| "step": 7200 |
| }, |
| { |
| "epoch": 7.95704845814978, |
| "grad_norm": 0.5408104658126831, |
| "learning_rate": 1.3102564102564103e-05, |
| "loss": 0.0903, |
| "step": 7225 |
| }, |
| { |
| "epoch": 7.984581497797357, |
| "grad_norm": 0.35938769578933716, |
| "learning_rate": 1.3076923076923078e-05, |
| "loss": 0.0908, |
| "step": 7250 |
| }, |
| { |
| "epoch": 8.012114537444933, |
| "grad_norm": 0.29418617486953735, |
| "learning_rate": 1.3051282051282052e-05, |
| "loss": 0.0998, |
| "step": 7275 |
| }, |
| { |
| "epoch": 8.039647577092511, |
| "grad_norm": 0.39860227704048157, |
| "learning_rate": 1.3025641025641027e-05, |
| "loss": 0.0574, |
| "step": 7300 |
| }, |
| { |
| "epoch": 8.067180616740089, |
| "grad_norm": 0.34400638937950134, |
| "learning_rate": 1.3000000000000001e-05, |
| "loss": 0.0556, |
| "step": 7325 |
| }, |
| { |
| "epoch": 8.094713656387665, |
| "grad_norm": 0.39651989936828613, |
| "learning_rate": 1.2974358974358976e-05, |
| "loss": 0.0561, |
| "step": 7350 |
| }, |
| { |
| "epoch": 8.122246696035242, |
| "grad_norm": 0.43096983432769775, |
| "learning_rate": 1.294871794871795e-05, |
| "loss": 0.0521, |
| "step": 7375 |
| }, |
| { |
| "epoch": 8.14977973568282, |
| "grad_norm": 0.2908264994621277, |
| "learning_rate": 1.2923076923076925e-05, |
| "loss": 0.052, |
| "step": 7400 |
| }, |
| { |
| "epoch": 8.177312775330396, |
| "grad_norm": 0.1850954294204712, |
| "learning_rate": 1.2897435897435899e-05, |
| "loss": 0.0537, |
| "step": 7425 |
| }, |
| { |
| "epoch": 8.204845814977974, |
| "grad_norm": 0.11924485862255096, |
| "learning_rate": 1.2871794871794874e-05, |
| "loss": 0.0436, |
| "step": 7450 |
| }, |
| { |
| "epoch": 8.232378854625551, |
| "grad_norm": 0.9369707703590393, |
| "learning_rate": 1.2846153846153848e-05, |
| "loss": 0.0616, |
| "step": 7475 |
| }, |
| { |
| "epoch": 8.259911894273127, |
| "grad_norm": 0.3720383644104004, |
| "learning_rate": 1.2820512820512823e-05, |
| "loss": 0.0483, |
| "step": 7500 |
| }, |
| { |
| "epoch": 8.287444933920705, |
| "grad_norm": 0.5974137187004089, |
| "learning_rate": 1.2794871794871795e-05, |
| "loss": 0.0469, |
| "step": 7525 |
| }, |
| { |
| "epoch": 8.314977973568283, |
| "grad_norm": 0.3387705981731415, |
| "learning_rate": 1.2769230769230769e-05, |
| "loss": 0.0512, |
| "step": 7550 |
| }, |
| { |
| "epoch": 8.342511013215859, |
| "grad_norm": 0.5228244066238403, |
| "learning_rate": 1.2743589743589744e-05, |
| "loss": 0.0542, |
| "step": 7575 |
| }, |
| { |
| "epoch": 8.370044052863436, |
| "grad_norm": 0.6965579390525818, |
| "learning_rate": 1.2717948717948718e-05, |
| "loss": 0.0499, |
| "step": 7600 |
| }, |
| { |
| "epoch": 8.397577092511014, |
| "grad_norm": 0.5114963054656982, |
| "learning_rate": 1.2692307692307693e-05, |
| "loss": 0.0641, |
| "step": 7625 |
| }, |
| { |
| "epoch": 8.42511013215859, |
| "grad_norm": 0.17714178562164307, |
| "learning_rate": 1.2666666666666667e-05, |
| "loss": 0.0474, |
| "step": 7650 |
| }, |
| { |
| "epoch": 8.452643171806168, |
| "grad_norm": 0.9684805274009705, |
| "learning_rate": 1.2641025641025642e-05, |
| "loss": 0.0557, |
| "step": 7675 |
| }, |
| { |
| "epoch": 8.480176211453745, |
| "grad_norm": 0.20210257172584534, |
| "learning_rate": 1.2615384615384616e-05, |
| "loss": 0.0548, |
| "step": 7700 |
| }, |
| { |
| "epoch": 8.507709251101321, |
| "grad_norm": 0.27927425503730774, |
| "learning_rate": 1.2589743589743591e-05, |
| "loss": 0.0523, |
| "step": 7725 |
| }, |
| { |
| "epoch": 8.535242290748899, |
| "grad_norm": 0.5390238761901855, |
| "learning_rate": 1.2564102564102565e-05, |
| "loss": 0.0693, |
| "step": 7750 |
| }, |
| { |
| "epoch": 8.562775330396477, |
| "grad_norm": 0.4304569363594055, |
| "learning_rate": 1.253846153846154e-05, |
| "loss": 0.0613, |
| "step": 7775 |
| }, |
| { |
| "epoch": 8.590308370044053, |
| "grad_norm": 0.5010227560997009, |
| "learning_rate": 1.2512820512820514e-05, |
| "loss": 0.0637, |
| "step": 7800 |
| }, |
| { |
| "epoch": 8.61784140969163, |
| "grad_norm": 0.39493897557258606, |
| "learning_rate": 1.2487179487179487e-05, |
| "loss": 0.0674, |
| "step": 7825 |
| }, |
| { |
| "epoch": 8.645374449339208, |
| "grad_norm": 0.3617560565471649, |
| "learning_rate": 1.2461538461538463e-05, |
| "loss": 0.0552, |
| "step": 7850 |
| }, |
| { |
| "epoch": 8.672907488986784, |
| "grad_norm": 0.9049015045166016, |
| "learning_rate": 1.2435897435897436e-05, |
| "loss": 0.0424, |
| "step": 7875 |
| }, |
| { |
| "epoch": 8.700440528634362, |
| "grad_norm": 0.687857449054718, |
| "learning_rate": 1.2410256410256412e-05, |
| "loss": 0.0656, |
| "step": 7900 |
| }, |
| { |
| "epoch": 8.72797356828194, |
| "grad_norm": 0.525587260723114, |
| "learning_rate": 1.2384615384615385e-05, |
| "loss": 0.0655, |
| "step": 7925 |
| }, |
| { |
| "epoch": 8.755506607929515, |
| "grad_norm": 0.4091179072856903, |
| "learning_rate": 1.235897435897436e-05, |
| "loss": 0.066, |
| "step": 7950 |
| }, |
| { |
| "epoch": 8.783039647577093, |
| "grad_norm": 0.32147249579429626, |
| "learning_rate": 1.2333333333333334e-05, |
| "loss": 0.0583, |
| "step": 7975 |
| }, |
| { |
| "epoch": 8.810572687224669, |
| "grad_norm": 0.47001683712005615, |
| "learning_rate": 1.230769230769231e-05, |
| "loss": 0.0624, |
| "step": 8000 |
| }, |
| { |
| "epoch": 8.810572687224669, |
| "eval_cer": 24.95328602707691, |
| "eval_loss": 0.8190127015113831, |
| "eval_runtime": 1298.6148, |
| "eval_samples_per_second": 8.148, |
| "eval_steps_per_second": 2.038, |
| "eval_wer": 84.65818010372466, |
| "step": 8000 |
| }, |
| { |
| "epoch": 8.838105726872246, |
| "grad_norm": 0.4862002730369568, |
| "learning_rate": 1.2282051282051283e-05, |
| "loss": 0.0534, |
| "step": 8025 |
| }, |
| { |
| "epoch": 8.865638766519824, |
| "grad_norm": 0.4833989143371582, |
| "learning_rate": 1.2256410256410259e-05, |
| "loss": 0.0582, |
| "step": 8050 |
| }, |
| { |
| "epoch": 8.8931718061674, |
| "grad_norm": 0.3487415611743927, |
| "learning_rate": 1.2230769230769232e-05, |
| "loss": 0.0569, |
| "step": 8075 |
| }, |
| { |
| "epoch": 8.920704845814978, |
| "grad_norm": 0.22568881511688232, |
| "learning_rate": 1.2205128205128208e-05, |
| "loss": 0.0668, |
| "step": 8100 |
| }, |
| { |
| "epoch": 8.948237885462555, |
| "grad_norm": 0.47726893424987793, |
| "learning_rate": 1.217948717948718e-05, |
| "loss": 0.0518, |
| "step": 8125 |
| }, |
| { |
| "epoch": 8.975770925110131, |
| "grad_norm": 0.4065573513507843, |
| "learning_rate": 1.2153846153846153e-05, |
| "loss": 0.0406, |
| "step": 8150 |
| }, |
| { |
| "epoch": 9.003303964757709, |
| "grad_norm": 0.2732442021369934, |
| "learning_rate": 1.2128205128205129e-05, |
| "loss": 0.0624, |
| "step": 8175 |
| }, |
| { |
| "epoch": 9.030837004405287, |
| "grad_norm": 0.4044542610645294, |
| "learning_rate": 1.2102564102564102e-05, |
| "loss": 0.0338, |
| "step": 8200 |
| }, |
| { |
| "epoch": 9.058370044052863, |
| "grad_norm": 0.12826256453990936, |
| "learning_rate": 1.2076923076923078e-05, |
| "loss": 0.0321, |
| "step": 8225 |
| }, |
| { |
| "epoch": 9.08590308370044, |
| "grad_norm": 0.3568229377269745, |
| "learning_rate": 1.2051282051282051e-05, |
| "loss": 0.0285, |
| "step": 8250 |
| }, |
| { |
| "epoch": 9.113436123348018, |
| "grad_norm": 0.37862274050712585, |
| "learning_rate": 1.2025641025641027e-05, |
| "loss": 0.0343, |
| "step": 8275 |
| }, |
| { |
| "epoch": 9.140969162995594, |
| "grad_norm": 0.2104802429676056, |
| "learning_rate": 1.2e-05, |
| "loss": 0.0384, |
| "step": 8300 |
| }, |
| { |
| "epoch": 9.168502202643172, |
| "grad_norm": 0.2630591094493866, |
| "learning_rate": 1.1974358974358976e-05, |
| "loss": 0.0266, |
| "step": 8325 |
| }, |
| { |
| "epoch": 9.19603524229075, |
| "grad_norm": 0.40195232629776, |
| "learning_rate": 1.194871794871795e-05, |
| "loss": 0.038, |
| "step": 8350 |
| }, |
| { |
| "epoch": 9.223568281938325, |
| "grad_norm": 0.23489591479301453, |
| "learning_rate": 1.1923076923076925e-05, |
| "loss": 0.0301, |
| "step": 8375 |
| }, |
| { |
| "epoch": 9.251101321585903, |
| "grad_norm": 0.24858242273330688, |
| "learning_rate": 1.1897435897435898e-05, |
| "loss": 0.0313, |
| "step": 8400 |
| }, |
| { |
| "epoch": 9.27863436123348, |
| "grad_norm": 0.3414604067802429, |
| "learning_rate": 1.1871794871794872e-05, |
| "loss": 0.0293, |
| "step": 8425 |
| }, |
| { |
| "epoch": 9.306167400881057, |
| "grad_norm": 0.2159339338541031, |
| "learning_rate": 1.1846153846153847e-05, |
| "loss": 0.0365, |
| "step": 8450 |
| }, |
| { |
| "epoch": 9.333700440528634, |
| "grad_norm": 0.2686537206172943, |
| "learning_rate": 1.1820512820512821e-05, |
| "loss": 0.0313, |
| "step": 8475 |
| }, |
| { |
| "epoch": 9.361233480176212, |
| "grad_norm": 0.2898625433444977, |
| "learning_rate": 1.1794871794871796e-05, |
| "loss": 0.0414, |
| "step": 8500 |
| }, |
| { |
| "epoch": 9.388766519823788, |
| "grad_norm": 0.5631348490715027, |
| "learning_rate": 1.176923076923077e-05, |
| "loss": 0.0354, |
| "step": 8525 |
| }, |
| { |
| "epoch": 9.416299559471366, |
| "grad_norm": 0.19131043553352356, |
| "learning_rate": 1.1743589743589745e-05, |
| "loss": 0.0409, |
| "step": 8550 |
| }, |
| { |
| "epoch": 9.443832599118943, |
| "grad_norm": 0.37251201272010803, |
| "learning_rate": 1.1717948717948719e-05, |
| "loss": 0.0342, |
| "step": 8575 |
| }, |
| { |
| "epoch": 9.47136563876652, |
| "grad_norm": 0.3231589198112488, |
| "learning_rate": 1.1692307692307694e-05, |
| "loss": 0.0336, |
| "step": 8600 |
| }, |
| { |
| "epoch": 9.498898678414097, |
| "grad_norm": 0.6271162033081055, |
| "learning_rate": 1.1666666666666668e-05, |
| "loss": 0.0459, |
| "step": 8625 |
| }, |
| { |
| "epoch": 9.526431718061675, |
| "grad_norm": 0.11155181378126144, |
| "learning_rate": 1.1641025641025643e-05, |
| "loss": 0.0446, |
| "step": 8650 |
| }, |
| { |
| "epoch": 9.55396475770925, |
| "grad_norm": 0.2658681869506836, |
| "learning_rate": 1.1615384615384617e-05, |
| "loss": 0.0352, |
| "step": 8675 |
| }, |
| { |
| "epoch": 9.581497797356828, |
| "grad_norm": 0.3016481399536133, |
| "learning_rate": 1.1589743589743592e-05, |
| "loss": 0.0267, |
| "step": 8700 |
| }, |
| { |
| "epoch": 9.609030837004406, |
| "grad_norm": 0.1538410484790802, |
| "learning_rate": 1.1564102564102566e-05, |
| "loss": 0.0431, |
| "step": 8725 |
| }, |
| { |
| "epoch": 9.636563876651982, |
| "grad_norm": 0.28318852186203003, |
| "learning_rate": 1.1538461538461538e-05, |
| "loss": 0.0439, |
| "step": 8750 |
| }, |
| { |
| "epoch": 9.66409691629956, |
| "grad_norm": 0.43803560733795166, |
| "learning_rate": 1.1512820512820513e-05, |
| "loss": 0.0329, |
| "step": 8775 |
| }, |
| { |
| "epoch": 9.691629955947137, |
| "grad_norm": 0.2602671980857849, |
| "learning_rate": 1.1487179487179487e-05, |
| "loss": 0.0293, |
| "step": 8800 |
| }, |
| { |
| "epoch": 9.719162995594713, |
| "grad_norm": 0.3581433892250061, |
| "learning_rate": 1.1461538461538462e-05, |
| "loss": 0.0343, |
| "step": 8825 |
| }, |
| { |
| "epoch": 9.746696035242291, |
| "grad_norm": 0.37504616379737854, |
| "learning_rate": 1.1435897435897436e-05, |
| "loss": 0.0344, |
| "step": 8850 |
| }, |
| { |
| "epoch": 9.774229074889869, |
| "grad_norm": 0.2697007954120636, |
| "learning_rate": 1.1410256410256411e-05, |
| "loss": 0.0416, |
| "step": 8875 |
| }, |
| { |
| "epoch": 9.801762114537445, |
| "grad_norm": 0.4063343107700348, |
| "learning_rate": 1.1384615384615385e-05, |
| "loss": 0.0331, |
| "step": 8900 |
| }, |
| { |
| "epoch": 9.829295154185022, |
| "grad_norm": 0.24234430491924286, |
| "learning_rate": 1.135897435897436e-05, |
| "loss": 0.0519, |
| "step": 8925 |
| }, |
| { |
| "epoch": 9.8568281938326, |
| "grad_norm": 0.539232075214386, |
| "learning_rate": 1.1333333333333334e-05, |
| "loss": 0.0475, |
| "step": 8950 |
| }, |
| { |
| "epoch": 9.884361233480176, |
| "grad_norm": 0.37743422389030457, |
| "learning_rate": 1.1307692307692309e-05, |
| "loss": 0.0328, |
| "step": 8975 |
| }, |
| { |
| "epoch": 9.911894273127754, |
| "grad_norm": 0.32006239891052246, |
| "learning_rate": 1.1282051282051283e-05, |
| "loss": 0.0355, |
| "step": 9000 |
| }, |
| { |
| "epoch": 9.911894273127754, |
| "eval_cer": 24.863870324303875, |
| "eval_loss": 0.8439357280731201, |
| "eval_runtime": 1305.3411, |
| "eval_samples_per_second": 8.106, |
| "eval_steps_per_second": 2.027, |
| "eval_wer": 84.0924092409241, |
| "step": 9000 |
| }, |
| { |
| "epoch": 9.939427312775331, |
| "grad_norm": 0.22994668781757355, |
| "learning_rate": 1.1256410256410258e-05, |
| "loss": 0.0382, |
| "step": 9025 |
| }, |
| { |
| "epoch": 9.966960352422907, |
| "grad_norm": 0.6525270938873291, |
| "learning_rate": 1.1230769230769232e-05, |
| "loss": 0.0414, |
| "step": 9050 |
| }, |
| { |
| "epoch": 9.994493392070485, |
| "grad_norm": 0.37982580065727234, |
| "learning_rate": 1.1205128205128205e-05, |
| "loss": 0.0378, |
| "step": 9075 |
| }, |
| { |
| "epoch": 10.022026431718063, |
| "grad_norm": 0.4457830786705017, |
| "learning_rate": 1.117948717948718e-05, |
| "loss": 0.026, |
| "step": 9100 |
| }, |
| { |
| "epoch": 10.049559471365638, |
| "grad_norm": 0.36874255537986755, |
| "learning_rate": 1.1153846153846154e-05, |
| "loss": 0.0235, |
| "step": 9125 |
| }, |
| { |
| "epoch": 10.077092511013216, |
| "grad_norm": 0.33007436990737915, |
| "learning_rate": 1.112820512820513e-05, |
| "loss": 0.0276, |
| "step": 9150 |
| }, |
| { |
| "epoch": 10.104625550660794, |
| "grad_norm": 0.6235056519508362, |
| "learning_rate": 1.1102564102564103e-05, |
| "loss": 0.0198, |
| "step": 9175 |
| }, |
| { |
| "epoch": 10.13215859030837, |
| "grad_norm": 0.3338548243045807, |
| "learning_rate": 1.1076923076923079e-05, |
| "loss": 0.0219, |
| "step": 9200 |
| }, |
| { |
| "epoch": 10.159691629955947, |
| "grad_norm": 0.21556895971298218, |
| "learning_rate": 1.1051282051282052e-05, |
| "loss": 0.021, |
| "step": 9225 |
| }, |
| { |
| "epoch": 10.187224669603523, |
| "grad_norm": 0.22250282764434814, |
| "learning_rate": 1.1025641025641028e-05, |
| "loss": 0.029, |
| "step": 9250 |
| }, |
| { |
| "epoch": 10.214757709251101, |
| "grad_norm": 0.17351406812667847, |
| "learning_rate": 1.1000000000000001e-05, |
| "loss": 0.0171, |
| "step": 9275 |
| }, |
| { |
| "epoch": 10.242290748898679, |
| "grad_norm": 0.396767795085907, |
| "learning_rate": 1.0974358974358977e-05, |
| "loss": 0.0178, |
| "step": 9300 |
| }, |
| { |
| "epoch": 10.269823788546255, |
| "grad_norm": 0.3102997839450836, |
| "learning_rate": 1.094871794871795e-05, |
| "loss": 0.0224, |
| "step": 9325 |
| }, |
| { |
| "epoch": 10.297356828193832, |
| "grad_norm": 0.27433833479881287, |
| "learning_rate": 1.0923076923076922e-05, |
| "loss": 0.0279, |
| "step": 9350 |
| }, |
| { |
| "epoch": 10.32488986784141, |
| "grad_norm": 0.4114525020122528, |
| "learning_rate": 1.0897435897435898e-05, |
| "loss": 0.0281, |
| "step": 9375 |
| }, |
| { |
| "epoch": 10.352422907488986, |
| "grad_norm": 0.2669256627559662, |
| "learning_rate": 1.0871794871794871e-05, |
| "loss": 0.0223, |
| "step": 9400 |
| }, |
| { |
| "epoch": 10.379955947136564, |
| "grad_norm": 0.1441548466682434, |
| "learning_rate": 1.0846153846153847e-05, |
| "loss": 0.0275, |
| "step": 9425 |
| }, |
| { |
| "epoch": 10.407488986784141, |
| "grad_norm": 0.25263088941574097, |
| "learning_rate": 1.082051282051282e-05, |
| "loss": 0.0287, |
| "step": 9450 |
| }, |
| { |
| "epoch": 10.435022026431717, |
| "grad_norm": 0.4364434480667114, |
| "learning_rate": 1.0794871794871796e-05, |
| "loss": 0.0257, |
| "step": 9475 |
| }, |
| { |
| "epoch": 10.462555066079295, |
| "grad_norm": 0.287691593170166, |
| "learning_rate": 1.076923076923077e-05, |
| "loss": 0.0273, |
| "step": 9500 |
| }, |
| { |
| "epoch": 10.490088105726873, |
| "grad_norm": 0.22964105010032654, |
| "learning_rate": 1.0743589743589745e-05, |
| "loss": 0.0319, |
| "step": 9525 |
| }, |
| { |
| "epoch": 10.517621145374449, |
| "grad_norm": 0.3216736316680908, |
| "learning_rate": 1.0717948717948718e-05, |
| "loss": 0.0259, |
| "step": 9550 |
| }, |
| { |
| "epoch": 10.545154185022026, |
| "grad_norm": 0.3481162190437317, |
| "learning_rate": 1.0692307692307694e-05, |
| "loss": 0.0273, |
| "step": 9575 |
| }, |
| { |
| "epoch": 10.572687224669604, |
| "grad_norm": 0.4183979630470276, |
| "learning_rate": 1.0666666666666667e-05, |
| "loss": 0.0329, |
| "step": 9600 |
| }, |
| { |
| "epoch": 10.60022026431718, |
| "grad_norm": 0.45229265093803406, |
| "learning_rate": 1.0641025641025643e-05, |
| "loss": 0.0351, |
| "step": 9625 |
| }, |
| { |
| "epoch": 10.627753303964758, |
| "grad_norm": 0.20271629095077515, |
| "learning_rate": 1.0615384615384616e-05, |
| "loss": 0.033, |
| "step": 9650 |
| }, |
| { |
| "epoch": 10.655286343612335, |
| "grad_norm": 0.2974882125854492, |
| "learning_rate": 1.058974358974359e-05, |
| "loss": 0.0336, |
| "step": 9675 |
| }, |
| { |
| "epoch": 10.682819383259911, |
| "grad_norm": 0.31492355465888977, |
| "learning_rate": 1.0564102564102565e-05, |
| "loss": 0.0318, |
| "step": 9700 |
| }, |
| { |
| "epoch": 10.710352422907489, |
| "grad_norm": 0.3608117699623108, |
| "learning_rate": 1.0538461538461539e-05, |
| "loss": 0.0329, |
| "step": 9725 |
| }, |
| { |
| "epoch": 10.737885462555067, |
| "grad_norm": 0.20358450710773468, |
| "learning_rate": 1.0512820512820514e-05, |
| "loss": 0.0322, |
| "step": 9750 |
| }, |
| { |
| "epoch": 10.765418502202643, |
| "grad_norm": 0.26170098781585693, |
| "learning_rate": 1.0487179487179488e-05, |
| "loss": 0.0279, |
| "step": 9775 |
| }, |
| { |
| "epoch": 10.79295154185022, |
| "grad_norm": 0.45118996500968933, |
| "learning_rate": 1.0461538461538463e-05, |
| "loss": 0.0304, |
| "step": 9800 |
| }, |
| { |
| "epoch": 10.820484581497798, |
| "grad_norm": 0.3625517785549164, |
| "learning_rate": 1.0435897435897437e-05, |
| "loss": 0.0314, |
| "step": 9825 |
| }, |
| { |
| "epoch": 10.848017621145374, |
| "grad_norm": 0.15762348473072052, |
| "learning_rate": 1.0410256410256412e-05, |
| "loss": 0.0235, |
| "step": 9850 |
| }, |
| { |
| "epoch": 10.875550660792952, |
| "grad_norm": 0.24998855590820312, |
| "learning_rate": 1.0384615384615386e-05, |
| "loss": 0.0278, |
| "step": 9875 |
| }, |
| { |
| "epoch": 10.90308370044053, |
| "grad_norm": 0.3237763047218323, |
| "learning_rate": 1.0358974358974361e-05, |
| "loss": 0.024, |
| "step": 9900 |
| }, |
| { |
| "epoch": 10.930616740088105, |
| "grad_norm": 0.3094446063041687, |
| "learning_rate": 1.0333333333333335e-05, |
| "loss": 0.0202, |
| "step": 9925 |
| }, |
| { |
| "epoch": 10.958149779735683, |
| "grad_norm": 0.20104403793811798, |
| "learning_rate": 1.0307692307692307e-05, |
| "loss": 0.0266, |
| "step": 9950 |
| }, |
| { |
| "epoch": 10.98568281938326, |
| "grad_norm": 0.7808817028999329, |
| "learning_rate": 1.0282051282051282e-05, |
| "loss": 0.0315, |
| "step": 9975 |
| }, |
| { |
| "epoch": 11.013215859030836, |
| "grad_norm": 0.19259458780288696, |
| "learning_rate": 1.0256410256410256e-05, |
| "loss": 0.0262, |
| "step": 10000 |
| }, |
| { |
| "epoch": 11.013215859030836, |
| "eval_cer": 25.51901230039091, |
| "eval_loss": 0.8545929193496704, |
| "eval_runtime": 1300.8852, |
| "eval_samples_per_second": 8.134, |
| "eval_steps_per_second": 2.034, |
| "eval_wer": 84.75247524752476, |
| "step": 10000 |
| }, |
| { |
| "epoch": 11.040748898678414, |
| "grad_norm": 0.31786996126174927, |
| "learning_rate": 1.0230769230769231e-05, |
| "loss": 0.0195, |
| "step": 10025 |
| }, |
| { |
| "epoch": 11.068281938325992, |
| "grad_norm": 0.22015278041362762, |
| "learning_rate": 1.0205128205128205e-05, |
| "loss": 0.02, |
| "step": 10050 |
| }, |
| { |
| "epoch": 11.095814977973568, |
| "grad_norm": 0.06974627077579498, |
| "learning_rate": 1.017948717948718e-05, |
| "loss": 0.018, |
| "step": 10075 |
| }, |
| { |
| "epoch": 11.123348017621145, |
| "grad_norm": 0.2470981329679489, |
| "learning_rate": 1.0153846153846154e-05, |
| "loss": 0.0177, |
| "step": 10100 |
| }, |
| { |
| "epoch": 11.150881057268723, |
| "grad_norm": 0.15128523111343384, |
| "learning_rate": 1.012820512820513e-05, |
| "loss": 0.0165, |
| "step": 10125 |
| }, |
| { |
| "epoch": 11.178414096916299, |
| "grad_norm": 0.19330032169818878, |
| "learning_rate": 1.0102564102564103e-05, |
| "loss": 0.0194, |
| "step": 10150 |
| }, |
| { |
| "epoch": 11.205947136563877, |
| "grad_norm": 0.6133860349655151, |
| "learning_rate": 1.0076923076923078e-05, |
| "loss": 0.0208, |
| "step": 10175 |
| }, |
| { |
| "epoch": 11.233480176211454, |
| "grad_norm": 0.1094980537891388, |
| "learning_rate": 1.0051282051282052e-05, |
| "loss": 0.0253, |
| "step": 10200 |
| }, |
| { |
| "epoch": 11.26101321585903, |
| "grad_norm": 0.21437473595142365, |
| "learning_rate": 1.0025641025641027e-05, |
| "loss": 0.0185, |
| "step": 10225 |
| }, |
| { |
| "epoch": 11.288546255506608, |
| "grad_norm": 0.12496840208768845, |
| "learning_rate": 1e-05, |
| "loss": 0.0174, |
| "step": 10250 |
| }, |
| { |
| "epoch": 11.316079295154186, |
| "grad_norm": 0.08179640024900436, |
| "learning_rate": 9.974358974358974e-06, |
| "loss": 0.0175, |
| "step": 10275 |
| }, |
| { |
| "epoch": 11.343612334801762, |
| "grad_norm": 0.19107000529766083, |
| "learning_rate": 9.94871794871795e-06, |
| "loss": 0.0192, |
| "step": 10300 |
| }, |
| { |
| "epoch": 11.37114537444934, |
| "grad_norm": 0.07791900634765625, |
| "learning_rate": 9.923076923076923e-06, |
| "loss": 0.0183, |
| "step": 10325 |
| }, |
| { |
| "epoch": 11.398678414096917, |
| "grad_norm": 0.1208205595612526, |
| "learning_rate": 9.897435897435899e-06, |
| "loss": 0.0194, |
| "step": 10350 |
| }, |
| { |
| "epoch": 11.426211453744493, |
| "grad_norm": 0.17363104224205017, |
| "learning_rate": 9.871794871794872e-06, |
| "loss": 0.0182, |
| "step": 10375 |
| }, |
| { |
| "epoch": 11.45374449339207, |
| "grad_norm": 0.15507516264915466, |
| "learning_rate": 9.846153846153848e-06, |
| "loss": 0.0211, |
| "step": 10400 |
| }, |
| { |
| "epoch": 11.481277533039648, |
| "grad_norm": 0.42732682824134827, |
| "learning_rate": 9.820512820512821e-06, |
| "loss": 0.0196, |
| "step": 10425 |
| }, |
| { |
| "epoch": 11.508810572687224, |
| "grad_norm": 0.2086770087480545, |
| "learning_rate": 9.794871794871795e-06, |
| "loss": 0.0174, |
| "step": 10450 |
| }, |
| { |
| "epoch": 11.536343612334802, |
| "grad_norm": 0.13412240147590637, |
| "learning_rate": 9.76923076923077e-06, |
| "loss": 0.0115, |
| "step": 10475 |
| }, |
| { |
| "epoch": 11.56387665198238, |
| "grad_norm": 0.5641820430755615, |
| "learning_rate": 9.743589743589744e-06, |
| "loss": 0.0213, |
| "step": 10500 |
| }, |
| { |
| "epoch": 11.591409691629956, |
| "grad_norm": 0.2280423492193222, |
| "learning_rate": 9.71794871794872e-06, |
| "loss": 0.0208, |
| "step": 10525 |
| }, |
| { |
| "epoch": 11.618942731277533, |
| "grad_norm": 0.34723925590515137, |
| "learning_rate": 9.692307692307693e-06, |
| "loss": 0.0145, |
| "step": 10550 |
| }, |
| { |
| "epoch": 11.646475770925111, |
| "grad_norm": 0.42695844173431396, |
| "learning_rate": 9.666666666666667e-06, |
| "loss": 0.0191, |
| "step": 10575 |
| }, |
| { |
| "epoch": 11.674008810572687, |
| "grad_norm": 0.179496169090271, |
| "learning_rate": 9.641025641025642e-06, |
| "loss": 0.0159, |
| "step": 10600 |
| }, |
| { |
| "epoch": 11.701541850220265, |
| "grad_norm": 0.20549049973487854, |
| "learning_rate": 9.615384615384616e-06, |
| "loss": 0.0148, |
| "step": 10625 |
| }, |
| { |
| "epoch": 11.729074889867842, |
| "grad_norm": 0.07314030826091766, |
| "learning_rate": 9.589743589743591e-06, |
| "loss": 0.0166, |
| "step": 10650 |
| }, |
| { |
| "epoch": 11.756607929515418, |
| "grad_norm": 0.1925014853477478, |
| "learning_rate": 9.564102564102565e-06, |
| "loss": 0.0156, |
| "step": 10675 |
| }, |
| { |
| "epoch": 11.784140969162996, |
| "grad_norm": 0.3135490417480469, |
| "learning_rate": 9.53846153846154e-06, |
| "loss": 0.0186, |
| "step": 10700 |
| }, |
| { |
| "epoch": 11.811674008810574, |
| "grad_norm": 0.4319511651992798, |
| "learning_rate": 9.512820512820514e-06, |
| "loss": 0.0172, |
| "step": 10725 |
| }, |
| { |
| "epoch": 11.83920704845815, |
| "grad_norm": 0.7985556125640869, |
| "learning_rate": 9.487179487179487e-06, |
| "loss": 0.0254, |
| "step": 10750 |
| }, |
| { |
| "epoch": 11.866740088105727, |
| "grad_norm": 0.0852452889084816, |
| "learning_rate": 9.461538461538463e-06, |
| "loss": 0.0207, |
| "step": 10775 |
| }, |
| { |
| "epoch": 11.894273127753303, |
| "grad_norm": 0.1884285807609558, |
| "learning_rate": 9.435897435897436e-06, |
| "loss": 0.0165, |
| "step": 10800 |
| }, |
| { |
| "epoch": 11.92180616740088, |
| "grad_norm": 0.1979898363351822, |
| "learning_rate": 9.410256410256412e-06, |
| "loss": 0.0174, |
| "step": 10825 |
| }, |
| { |
| "epoch": 11.949339207048459, |
| "grad_norm": 0.48160043358802795, |
| "learning_rate": 9.384615384615385e-06, |
| "loss": 0.0208, |
| "step": 10850 |
| }, |
| { |
| "epoch": 11.976872246696034, |
| "grad_norm": 0.3505367040634155, |
| "learning_rate": 9.358974358974359e-06, |
| "loss": 0.0234, |
| "step": 10875 |
| }, |
| { |
| "epoch": 12.004405286343612, |
| "grad_norm": 0.057797495275735855, |
| "learning_rate": 9.333333333333334e-06, |
| "loss": 0.0196, |
| "step": 10900 |
| }, |
| { |
| "epoch": 12.03193832599119, |
| "grad_norm": 0.0727895125746727, |
| "learning_rate": 9.307692307692308e-06, |
| "loss": 0.0155, |
| "step": 10925 |
| }, |
| { |
| "epoch": 12.059471365638766, |
| "grad_norm": 0.15412183105945587, |
| "learning_rate": 9.282051282051283e-06, |
| "loss": 0.0136, |
| "step": 10950 |
| }, |
| { |
| "epoch": 12.087004405286343, |
| "grad_norm": 0.11031453311443329, |
| "learning_rate": 9.256410256410257e-06, |
| "loss": 0.0097, |
| "step": 10975 |
| }, |
| { |
| "epoch": 12.114537444933921, |
| "grad_norm": 0.08627741038799286, |
| "learning_rate": 9.230769230769232e-06, |
| "loss": 0.0146, |
| "step": 11000 |
| }, |
| { |
| "epoch": 12.114537444933921, |
| "eval_cer": 23.871126752490458, |
| "eval_loss": 0.8571091890335083, |
| "eval_runtime": 1299.2634, |
| "eval_samples_per_second": 8.144, |
| "eval_steps_per_second": 2.037, |
| "eval_wer": 83.58321546440358, |
| "step": 11000 |
| }, |
| { |
| "epoch": 12.142070484581497, |
| "grad_norm": 0.17661835253238678, |
| "learning_rate": 9.205128205128206e-06, |
| "loss": 0.0164, |
| "step": 11025 |
| }, |
| { |
| "epoch": 12.169603524229075, |
| "grad_norm": 0.4166176915168762, |
| "learning_rate": 9.17948717948718e-06, |
| "loss": 0.0125, |
| "step": 11050 |
| }, |
| { |
| "epoch": 12.197136563876652, |
| "grad_norm": 0.08312999457120895, |
| "learning_rate": 9.153846153846155e-06, |
| "loss": 0.0164, |
| "step": 11075 |
| }, |
| { |
| "epoch": 12.224669603524228, |
| "grad_norm": 0.24172239005565643, |
| "learning_rate": 9.128205128205129e-06, |
| "loss": 0.0147, |
| "step": 11100 |
| }, |
| { |
| "epoch": 12.252202643171806, |
| "grad_norm": 0.054716769605875015, |
| "learning_rate": 9.102564102564104e-06, |
| "loss": 0.012, |
| "step": 11125 |
| }, |
| { |
| "epoch": 12.279735682819384, |
| "grad_norm": 0.2059546858072281, |
| "learning_rate": 9.076923076923078e-06, |
| "loss": 0.0126, |
| "step": 11150 |
| }, |
| { |
| "epoch": 12.30726872246696, |
| "grad_norm": 0.12244844436645508, |
| "learning_rate": 9.051282051282051e-06, |
| "loss": 0.0121, |
| "step": 11175 |
| }, |
| { |
| "epoch": 12.334801762114537, |
| "grad_norm": 0.10340509563684464, |
| "learning_rate": 9.025641025641027e-06, |
| "loss": 0.0124, |
| "step": 11200 |
| }, |
| { |
| "epoch": 12.362334801762115, |
| "grad_norm": 0.19950826466083527, |
| "learning_rate": 9e-06, |
| "loss": 0.0127, |
| "step": 11225 |
| }, |
| { |
| "epoch": 12.389867841409691, |
| "grad_norm": 0.17073887586593628, |
| "learning_rate": 8.974358974358976e-06, |
| "loss": 0.0109, |
| "step": 11250 |
| }, |
| { |
| "epoch": 12.417400881057269, |
| "grad_norm": 1.0863208770751953, |
| "learning_rate": 8.94871794871795e-06, |
| "loss": 0.0121, |
| "step": 11275 |
| }, |
| { |
| "epoch": 12.444933920704846, |
| "grad_norm": 0.08929897099733353, |
| "learning_rate": 8.923076923076925e-06, |
| "loss": 0.0164, |
| "step": 11300 |
| }, |
| { |
| "epoch": 12.472466960352422, |
| "grad_norm": 0.18044264614582062, |
| "learning_rate": 8.897435897435898e-06, |
| "loss": 0.0123, |
| "step": 11325 |
| }, |
| { |
| "epoch": 12.5, |
| "grad_norm": 0.1298847645521164, |
| "learning_rate": 8.871794871794872e-06, |
| "loss": 0.0167, |
| "step": 11350 |
| }, |
| { |
| "epoch": 12.527533039647578, |
| "grad_norm": 0.2778928577899933, |
| "learning_rate": 8.846153846153847e-06, |
| "loss": 0.0149, |
| "step": 11375 |
| }, |
| { |
| "epoch": 12.555066079295154, |
| "grad_norm": 0.17325860261917114, |
| "learning_rate": 8.820512820512821e-06, |
| "loss": 0.0181, |
| "step": 11400 |
| }, |
| { |
| "epoch": 12.582599118942731, |
| "grad_norm": 0.1085548922419548, |
| "learning_rate": 8.794871794871796e-06, |
| "loss": 0.0178, |
| "step": 11425 |
| }, |
| { |
| "epoch": 12.610132158590309, |
| "grad_norm": 0.1976926326751709, |
| "learning_rate": 8.76923076923077e-06, |
| "loss": 0.0163, |
| "step": 11450 |
| }, |
| { |
| "epoch": 12.637665198237885, |
| "grad_norm": 0.38330408930778503, |
| "learning_rate": 8.743589743589743e-06, |
| "loss": 0.0202, |
| "step": 11475 |
| }, |
| { |
| "epoch": 12.665198237885463, |
| "grad_norm": 0.24536395072937012, |
| "learning_rate": 8.717948717948719e-06, |
| "loss": 0.0167, |
| "step": 11500 |
| }, |
| { |
| "epoch": 12.69273127753304, |
| "grad_norm": 0.27022993564605713, |
| "learning_rate": 8.692307692307692e-06, |
| "loss": 0.0116, |
| "step": 11525 |
| }, |
| { |
| "epoch": 12.720264317180616, |
| "grad_norm": 0.33529072999954224, |
| "learning_rate": 8.666666666666668e-06, |
| "loss": 0.0116, |
| "step": 11550 |
| }, |
| { |
| "epoch": 12.747797356828194, |
| "grad_norm": 0.2724566161632538, |
| "learning_rate": 8.641025641025641e-06, |
| "loss": 0.0128, |
| "step": 11575 |
| }, |
| { |
| "epoch": 12.775330396475772, |
| "grad_norm": 0.42723408341407776, |
| "learning_rate": 8.615384615384617e-06, |
| "loss": 0.0136, |
| "step": 11600 |
| }, |
| { |
| "epoch": 12.802863436123348, |
| "grad_norm": 0.07522483170032501, |
| "learning_rate": 8.58974358974359e-06, |
| "loss": 0.0115, |
| "step": 11625 |
| }, |
| { |
| "epoch": 12.830396475770925, |
| "grad_norm": 0.3467361330986023, |
| "learning_rate": 8.564102564102564e-06, |
| "loss": 0.0136, |
| "step": 11650 |
| }, |
| { |
| "epoch": 12.857929515418503, |
| "grad_norm": 0.22382023930549622, |
| "learning_rate": 8.53846153846154e-06, |
| "loss": 0.014, |
| "step": 11675 |
| }, |
| { |
| "epoch": 12.885462555066079, |
| "grad_norm": 0.32350465655326843, |
| "learning_rate": 8.512820512820513e-06, |
| "loss": 0.0142, |
| "step": 11700 |
| }, |
| { |
| "epoch": 12.912995594713657, |
| "grad_norm": 0.09188594669103622, |
| "learning_rate": 8.487179487179488e-06, |
| "loss": 0.0137, |
| "step": 11725 |
| }, |
| { |
| "epoch": 12.940528634361234, |
| "grad_norm": 0.28958576917648315, |
| "learning_rate": 8.461538461538462e-06, |
| "loss": 0.013, |
| "step": 11750 |
| }, |
| { |
| "epoch": 12.96806167400881, |
| "grad_norm": 0.2282794862985611, |
| "learning_rate": 8.435897435897436e-06, |
| "loss": 0.0157, |
| "step": 11775 |
| }, |
| { |
| "epoch": 12.995594713656388, |
| "grad_norm": 0.4104384183883667, |
| "learning_rate": 8.410256410256411e-06, |
| "loss": 0.0166, |
| "step": 11800 |
| }, |
| { |
| "epoch": 13.023127753303966, |
| "grad_norm": 0.1922859251499176, |
| "learning_rate": 8.384615384615385e-06, |
| "loss": 0.0142, |
| "step": 11825 |
| }, |
| { |
| "epoch": 13.050660792951541, |
| "grad_norm": 0.13582871854305267, |
| "learning_rate": 8.35897435897436e-06, |
| "loss": 0.0084, |
| "step": 11850 |
| }, |
| { |
| "epoch": 13.07819383259912, |
| "grad_norm": 0.06614464521408081, |
| "learning_rate": 8.333333333333334e-06, |
| "loss": 0.0096, |
| "step": 11875 |
| }, |
| { |
| "epoch": 13.105726872246697, |
| "grad_norm": 0.05340244993567467, |
| "learning_rate": 8.307692307692309e-06, |
| "loss": 0.0101, |
| "step": 11900 |
| }, |
| { |
| "epoch": 13.133259911894273, |
| "grad_norm": 0.08093658089637756, |
| "learning_rate": 8.282051282051283e-06, |
| "loss": 0.0106, |
| "step": 11925 |
| }, |
| { |
| "epoch": 13.16079295154185, |
| "grad_norm": 0.06269955635070801, |
| "learning_rate": 8.256410256410256e-06, |
| "loss": 0.0088, |
| "step": 11950 |
| }, |
| { |
| "epoch": 13.188325991189428, |
| "grad_norm": 0.09279240667819977, |
| "learning_rate": 8.230769230769232e-06, |
| "loss": 0.0087, |
| "step": 11975 |
| }, |
| { |
| "epoch": 13.215859030837004, |
| "grad_norm": 0.7125237584114075, |
| "learning_rate": 8.205128205128205e-06, |
| "loss": 0.0089, |
| "step": 12000 |
| }, |
| { |
| "epoch": 13.215859030837004, |
| "eval_cer": 23.365584125273692, |
| "eval_loss": 0.8545632362365723, |
| "eval_runtime": 1300.1108, |
| "eval_samples_per_second": 8.139, |
| "eval_steps_per_second": 2.035, |
| "eval_wer": 82.86657237152286, |
| "step": 12000 |
| }, |
| { |
| "epoch": 13.243392070484582, |
| "grad_norm": 0.03670433908700943, |
| "learning_rate": 8.17948717948718e-06, |
| "loss": 0.0089, |
| "step": 12025 |
| }, |
| { |
| "epoch": 13.270925110132158, |
| "grad_norm": 0.047082215547561646, |
| "learning_rate": 8.153846153846154e-06, |
| "loss": 0.0082, |
| "step": 12050 |
| }, |
| { |
| "epoch": 13.298458149779735, |
| "grad_norm": 0.10568676888942719, |
| "learning_rate": 8.12820512820513e-06, |
| "loss": 0.0124, |
| "step": 12075 |
| }, |
| { |
| "epoch": 13.325991189427313, |
| "grad_norm": 0.08048704266548157, |
| "learning_rate": 8.102564102564103e-06, |
| "loss": 0.0096, |
| "step": 12100 |
| }, |
| { |
| "epoch": 13.353524229074889, |
| "grad_norm": 0.2693760395050049, |
| "learning_rate": 8.076923076923077e-06, |
| "loss": 0.0113, |
| "step": 12125 |
| }, |
| { |
| "epoch": 13.381057268722467, |
| "grad_norm": 0.10042113065719604, |
| "learning_rate": 8.051282051282052e-06, |
| "loss": 0.0098, |
| "step": 12150 |
| }, |
| { |
| "epoch": 13.408590308370044, |
| "grad_norm": 0.09456273168325424, |
| "learning_rate": 8.025641025641026e-06, |
| "loss": 0.0089, |
| "step": 12175 |
| }, |
| { |
| "epoch": 13.43612334801762, |
| "grad_norm": 0.0927928239107132, |
| "learning_rate": 8.000000000000001e-06, |
| "loss": 0.0085, |
| "step": 12200 |
| }, |
| { |
| "epoch": 13.463656387665198, |
| "grad_norm": 0.08245880156755447, |
| "learning_rate": 7.974358974358975e-06, |
| "loss": 0.0081, |
| "step": 12225 |
| }, |
| { |
| "epoch": 13.491189427312776, |
| "grad_norm": 0.11756213009357452, |
| "learning_rate": 7.948717948717949e-06, |
| "loss": 0.0084, |
| "step": 12250 |
| }, |
| { |
| "epoch": 13.518722466960352, |
| "grad_norm": 0.10488010197877884, |
| "learning_rate": 7.923076923076924e-06, |
| "loss": 0.0085, |
| "step": 12275 |
| }, |
| { |
| "epoch": 13.54625550660793, |
| "grad_norm": 0.26627275347709656, |
| "learning_rate": 7.897435897435898e-06, |
| "loss": 0.0104, |
| "step": 12300 |
| }, |
| { |
| "epoch": 13.573788546255507, |
| "grad_norm": 0.07299219816923141, |
| "learning_rate": 7.871794871794873e-06, |
| "loss": 0.0097, |
| "step": 12325 |
| }, |
| { |
| "epoch": 13.601321585903083, |
| "grad_norm": 0.12166466563940048, |
| "learning_rate": 7.846153846153847e-06, |
| "loss": 0.0098, |
| "step": 12350 |
| }, |
| { |
| "epoch": 13.62885462555066, |
| "grad_norm": 0.05905827134847641, |
| "learning_rate": 7.820512820512822e-06, |
| "loss": 0.0139, |
| "step": 12375 |
| }, |
| { |
| "epoch": 13.656387665198238, |
| "grad_norm": 0.2588983476161957, |
| "learning_rate": 7.794871794871796e-06, |
| "loss": 0.0122, |
| "step": 12400 |
| }, |
| { |
| "epoch": 13.683920704845814, |
| "grad_norm": 0.04871825873851776, |
| "learning_rate": 7.76923076923077e-06, |
| "loss": 0.0099, |
| "step": 12425 |
| }, |
| { |
| "epoch": 13.711453744493392, |
| "grad_norm": 0.14013290405273438, |
| "learning_rate": 7.743589743589745e-06, |
| "loss": 0.0084, |
| "step": 12450 |
| }, |
| { |
| "epoch": 13.73898678414097, |
| "grad_norm": 0.22295306622982025, |
| "learning_rate": 7.717948717948718e-06, |
| "loss": 0.0098, |
| "step": 12475 |
| }, |
| { |
| "epoch": 13.766519823788546, |
| "grad_norm": 0.1525130271911621, |
| "learning_rate": 7.692307692307694e-06, |
| "loss": 0.0094, |
| "step": 12500 |
| }, |
| { |
| "epoch": 13.794052863436123, |
| "grad_norm": 0.062416739761829376, |
| "learning_rate": 7.666666666666667e-06, |
| "loss": 0.0083, |
| "step": 12525 |
| }, |
| { |
| "epoch": 13.821585903083701, |
| "grad_norm": 0.09009592980146408, |
| "learning_rate": 7.641025641025641e-06, |
| "loss": 0.0094, |
| "step": 12550 |
| }, |
| { |
| "epoch": 13.849118942731277, |
| "grad_norm": 0.33683300018310547, |
| "learning_rate": 7.615384615384615e-06, |
| "loss": 0.0087, |
| "step": 12575 |
| }, |
| { |
| "epoch": 13.876651982378855, |
| "grad_norm": 0.029187936335802078, |
| "learning_rate": 7.58974358974359e-06, |
| "loss": 0.0103, |
| "step": 12600 |
| }, |
| { |
| "epoch": 13.904185022026432, |
| "grad_norm": 0.24236032366752625, |
| "learning_rate": 7.564102564102564e-06, |
| "loss": 0.0119, |
| "step": 12625 |
| }, |
| { |
| "epoch": 13.931718061674008, |
| "grad_norm": 0.042555954307317734, |
| "learning_rate": 7.538461538461539e-06, |
| "loss": 0.0107, |
| "step": 12650 |
| }, |
| { |
| "epoch": 13.959251101321586, |
| "grad_norm": 0.05459924414753914, |
| "learning_rate": 7.512820512820513e-06, |
| "loss": 0.0096, |
| "step": 12675 |
| }, |
| { |
| "epoch": 13.986784140969164, |
| "grad_norm": 0.3051317632198334, |
| "learning_rate": 7.487179487179488e-06, |
| "loss": 0.0115, |
| "step": 12700 |
| }, |
| { |
| "epoch": 14.01431718061674, |
| "grad_norm": 0.07105377316474915, |
| "learning_rate": 7.461538461538462e-06, |
| "loss": 0.0118, |
| "step": 12725 |
| }, |
| { |
| "epoch": 14.041850220264317, |
| "grad_norm": 0.32920604944229126, |
| "learning_rate": 7.435897435897437e-06, |
| "loss": 0.0136, |
| "step": 12750 |
| }, |
| { |
| "epoch": 14.069383259911895, |
| "grad_norm": 0.7461898922920227, |
| "learning_rate": 7.410256410256411e-06, |
| "loss": 0.0123, |
| "step": 12775 |
| }, |
| { |
| "epoch": 14.09691629955947, |
| "grad_norm": 0.08446882665157318, |
| "learning_rate": 7.384615384615386e-06, |
| "loss": 0.0086, |
| "step": 12800 |
| }, |
| { |
| "epoch": 14.124449339207048, |
| "grad_norm": 0.275642991065979, |
| "learning_rate": 7.35897435897436e-06, |
| "loss": 0.0108, |
| "step": 12825 |
| }, |
| { |
| "epoch": 14.151982378854626, |
| "grad_norm": 0.06443957984447479, |
| "learning_rate": 7.333333333333333e-06, |
| "loss": 0.0091, |
| "step": 12850 |
| }, |
| { |
| "epoch": 14.179515418502202, |
| "grad_norm": 0.11947249621152878, |
| "learning_rate": 7.307692307692308e-06, |
| "loss": 0.0093, |
| "step": 12875 |
| }, |
| { |
| "epoch": 14.20704845814978, |
| "grad_norm": 0.03607160225510597, |
| "learning_rate": 7.282051282051282e-06, |
| "loss": 0.0091, |
| "step": 12900 |
| }, |
| { |
| "epoch": 14.234581497797357, |
| "grad_norm": 0.16806413233280182, |
| "learning_rate": 7.256410256410257e-06, |
| "loss": 0.007, |
| "step": 12925 |
| }, |
| { |
| "epoch": 14.262114537444933, |
| "grad_norm": 0.08241688460111618, |
| "learning_rate": 7.230769230769231e-06, |
| "loss": 0.0068, |
| "step": 12950 |
| }, |
| { |
| "epoch": 14.289647577092511, |
| "grad_norm": 0.0358104445040226, |
| "learning_rate": 7.205128205128206e-06, |
| "loss": 0.0079, |
| "step": 12975 |
| }, |
| { |
| "epoch": 14.317180616740089, |
| "grad_norm": 0.2213512361049652, |
| "learning_rate": 7.17948717948718e-06, |
| "loss": 0.0098, |
| "step": 13000 |
| }, |
| { |
| "epoch": 14.317180616740089, |
| "eval_cer": 23.705478431327595, |
| "eval_loss": 0.8760920763015747, |
| "eval_runtime": 1295.1667, |
| "eval_samples_per_second": 8.17, |
| "eval_steps_per_second": 2.043, |
| "eval_wer": 83.88495992456389, |
| "step": 13000 |
| }, |
| { |
| "epoch": 14.344713656387665, |
| "grad_norm": 0.3628979027271271, |
| "learning_rate": 7.153846153846155e-06, |
| "loss": 0.0078, |
| "step": 13025 |
| }, |
| { |
| "epoch": 14.372246696035242, |
| "grad_norm": 0.0924341008067131, |
| "learning_rate": 7.128205128205129e-06, |
| "loss": 0.0088, |
| "step": 13050 |
| }, |
| { |
| "epoch": 14.39977973568282, |
| "grad_norm": 0.6887943148612976, |
| "learning_rate": 7.102564102564104e-06, |
| "loss": 0.0088, |
| "step": 13075 |
| }, |
| { |
| "epoch": 14.427312775330396, |
| "grad_norm": 0.3483204245567322, |
| "learning_rate": 7.076923076923078e-06, |
| "loss": 0.0085, |
| "step": 13100 |
| }, |
| { |
| "epoch": 14.454845814977974, |
| "grad_norm": 0.0400351881980896, |
| "learning_rate": 7.051282051282053e-06, |
| "loss": 0.0094, |
| "step": 13125 |
| }, |
| { |
| "epoch": 14.482378854625551, |
| "grad_norm": 0.15237635374069214, |
| "learning_rate": 7.025641025641025e-06, |
| "loss": 0.0092, |
| "step": 13150 |
| }, |
| { |
| "epoch": 14.509911894273127, |
| "grad_norm": 0.1783585399389267, |
| "learning_rate": 7e-06, |
| "loss": 0.0068, |
| "step": 13175 |
| }, |
| { |
| "epoch": 14.537444933920705, |
| "grad_norm": 0.21304604411125183, |
| "learning_rate": 6.974358974358974e-06, |
| "loss": 0.0084, |
| "step": 13200 |
| }, |
| { |
| "epoch": 14.564977973568283, |
| "grad_norm": 0.01639872044324875, |
| "learning_rate": 6.948717948717949e-06, |
| "loss": 0.0065, |
| "step": 13225 |
| }, |
| { |
| "epoch": 14.592511013215859, |
| "grad_norm": 0.19896923005580902, |
| "learning_rate": 6.923076923076923e-06, |
| "loss": 0.0062, |
| "step": 13250 |
| }, |
| { |
| "epoch": 14.620044052863436, |
| "grad_norm": 0.03251450136303902, |
| "learning_rate": 6.897435897435898e-06, |
| "loss": 0.0076, |
| "step": 13275 |
| }, |
| { |
| "epoch": 14.647577092511014, |
| "grad_norm": 0.06453995406627655, |
| "learning_rate": 6.871794871794872e-06, |
| "loss": 0.0075, |
| "step": 13300 |
| }, |
| { |
| "epoch": 14.67511013215859, |
| "grad_norm": 0.24187321960926056, |
| "learning_rate": 6.846153846153847e-06, |
| "loss": 0.0063, |
| "step": 13325 |
| }, |
| { |
| "epoch": 14.702643171806168, |
| "grad_norm": 0.3047979772090912, |
| "learning_rate": 6.820512820512821e-06, |
| "loss": 0.0091, |
| "step": 13350 |
| }, |
| { |
| "epoch": 14.730176211453745, |
| "grad_norm": 0.06145111471414566, |
| "learning_rate": 6.794871794871796e-06, |
| "loss": 0.0066, |
| "step": 13375 |
| }, |
| { |
| "epoch": 14.757709251101321, |
| "grad_norm": 0.2679343819618225, |
| "learning_rate": 6.76923076923077e-06, |
| "loss": 0.0098, |
| "step": 13400 |
| }, |
| { |
| "epoch": 14.785242290748899, |
| "grad_norm": 0.030670566484332085, |
| "learning_rate": 6.743589743589745e-06, |
| "loss": 0.0085, |
| "step": 13425 |
| }, |
| { |
| "epoch": 14.812775330396477, |
| "grad_norm": 0.332878440618515, |
| "learning_rate": 6.717948717948718e-06, |
| "loss": 0.01, |
| "step": 13450 |
| }, |
| { |
| "epoch": 14.840308370044053, |
| "grad_norm": 0.2726658582687378, |
| "learning_rate": 6.692307692307692e-06, |
| "loss": 0.009, |
| "step": 13475 |
| }, |
| { |
| "epoch": 14.86784140969163, |
| "grad_norm": 0.13588112592697144, |
| "learning_rate": 6.666666666666667e-06, |
| "loss": 0.0068, |
| "step": 13500 |
| }, |
| { |
| "epoch": 14.895374449339208, |
| "grad_norm": 0.09434377402067184, |
| "learning_rate": 6.641025641025641e-06, |
| "loss": 0.0083, |
| "step": 13525 |
| }, |
| { |
| "epoch": 14.922907488986784, |
| "grad_norm": 0.18095168471336365, |
| "learning_rate": 6.615384615384616e-06, |
| "loss": 0.0078, |
| "step": 13550 |
| }, |
| { |
| "epoch": 14.950440528634362, |
| "grad_norm": 0.05562664195895195, |
| "learning_rate": 6.58974358974359e-06, |
| "loss": 0.0072, |
| "step": 13575 |
| }, |
| { |
| "epoch": 14.97797356828194, |
| "grad_norm": 0.15413448214530945, |
| "learning_rate": 6.564102564102565e-06, |
| "loss": 0.0112, |
| "step": 13600 |
| }, |
| { |
| "epoch": 15.005506607929515, |
| "grad_norm": 0.12938186526298523, |
| "learning_rate": 6.538461538461539e-06, |
| "loss": 0.0079, |
| "step": 13625 |
| }, |
| { |
| "epoch": 15.033039647577093, |
| "grad_norm": 0.07106704264879227, |
| "learning_rate": 6.512820512820514e-06, |
| "loss": 0.0078, |
| "step": 13650 |
| }, |
| { |
| "epoch": 15.060572687224669, |
| "grad_norm": 0.3770430386066437, |
| "learning_rate": 6.487179487179488e-06, |
| "loss": 0.0085, |
| "step": 13675 |
| }, |
| { |
| "epoch": 15.088105726872246, |
| "grad_norm": 0.03784358873963356, |
| "learning_rate": 6.461538461538463e-06, |
| "loss": 0.007, |
| "step": 13700 |
| }, |
| { |
| "epoch": 15.115638766519824, |
| "grad_norm": 0.10024132579565048, |
| "learning_rate": 6.435897435897437e-06, |
| "loss": 0.0087, |
| "step": 13725 |
| }, |
| { |
| "epoch": 15.1431718061674, |
| "grad_norm": 0.2917975187301636, |
| "learning_rate": 6.410256410256412e-06, |
| "loss": 0.0085, |
| "step": 13750 |
| }, |
| { |
| "epoch": 15.170704845814978, |
| "grad_norm": 0.4482714533805847, |
| "learning_rate": 6.384615384615384e-06, |
| "loss": 0.0062, |
| "step": 13775 |
| }, |
| { |
| "epoch": 15.198237885462555, |
| "grad_norm": 0.04433772340416908, |
| "learning_rate": 6.358974358974359e-06, |
| "loss": 0.0052, |
| "step": 13800 |
| }, |
| { |
| "epoch": 15.225770925110131, |
| "grad_norm": 0.1545044481754303, |
| "learning_rate": 6.333333333333333e-06, |
| "loss": 0.0075, |
| "step": 13825 |
| }, |
| { |
| "epoch": 15.253303964757709, |
| "grad_norm": 0.2501870095729828, |
| "learning_rate": 6.307692307692308e-06, |
| "loss": 0.0066, |
| "step": 13850 |
| }, |
| { |
| "epoch": 15.280837004405287, |
| "grad_norm": 0.14185580611228943, |
| "learning_rate": 6.282051282051282e-06, |
| "loss": 0.0073, |
| "step": 13875 |
| }, |
| { |
| "epoch": 15.308370044052863, |
| "grad_norm": 0.166921004652977, |
| "learning_rate": 6.256410256410257e-06, |
| "loss": 0.0105, |
| "step": 13900 |
| }, |
| { |
| "epoch": 15.33590308370044, |
| "grad_norm": 0.055341098457574844, |
| "learning_rate": 6.230769230769231e-06, |
| "loss": 0.0086, |
| "step": 13925 |
| }, |
| { |
| "epoch": 15.363436123348018, |
| "grad_norm": 0.28348565101623535, |
| "learning_rate": 6.205128205128206e-06, |
| "loss": 0.0089, |
| "step": 13950 |
| }, |
| { |
| "epoch": 15.390969162995594, |
| "grad_norm": 0.10073395073413849, |
| "learning_rate": 6.17948717948718e-06, |
| "loss": 0.0084, |
| "step": 13975 |
| }, |
| { |
| "epoch": 15.418502202643172, |
| "grad_norm": 0.05402739346027374, |
| "learning_rate": 6.153846153846155e-06, |
| "loss": 0.0076, |
| "step": 14000 |
| }, |
| { |
| "epoch": 15.418502202643172, |
| "eval_cer": 23.665356000596105, |
| "eval_loss": 0.8774716854095459, |
| "eval_runtime": 1296.7729, |
| "eval_samples_per_second": 8.159, |
| "eval_steps_per_second": 2.04, |
| "eval_wer": 83.18717586044319, |
| "step": 14000 |
| }, |
| { |
| "epoch": 15.44603524229075, |
| "grad_norm": 0.09686881303787231, |
| "learning_rate": 6.128205128205129e-06, |
| "loss": 0.0078, |
| "step": 14025 |
| }, |
| { |
| "epoch": 15.473568281938325, |
| "grad_norm": 0.10279793292284012, |
| "learning_rate": 6.102564102564104e-06, |
| "loss": 0.0096, |
| "step": 14050 |
| }, |
| { |
| "epoch": 15.501101321585903, |
| "grad_norm": 0.1425463706254959, |
| "learning_rate": 6.076923076923077e-06, |
| "loss": 0.0077, |
| "step": 14075 |
| }, |
| { |
| "epoch": 15.52863436123348, |
| "grad_norm": 0.1172114908695221, |
| "learning_rate": 6.051282051282051e-06, |
| "loss": 0.0046, |
| "step": 14100 |
| }, |
| { |
| "epoch": 15.556167400881057, |
| "grad_norm": 0.03888353705406189, |
| "learning_rate": 6.025641025641026e-06, |
| "loss": 0.0052, |
| "step": 14125 |
| }, |
| { |
| "epoch": 15.583700440528634, |
| "grad_norm": 0.2044220268726349, |
| "learning_rate": 6e-06, |
| "loss": 0.0065, |
| "step": 14150 |
| }, |
| { |
| "epoch": 15.611233480176212, |
| "grad_norm": 0.18413954973220825, |
| "learning_rate": 5.974358974358975e-06, |
| "loss": 0.0051, |
| "step": 14175 |
| }, |
| { |
| "epoch": 15.638766519823788, |
| "grad_norm": 0.16837269067764282, |
| "learning_rate": 5.948717948717949e-06, |
| "loss": 0.0109, |
| "step": 14200 |
| }, |
| { |
| "epoch": 15.666299559471366, |
| "grad_norm": 0.06798551231622696, |
| "learning_rate": 5.923076923076924e-06, |
| "loss": 0.0048, |
| "step": 14225 |
| }, |
| { |
| "epoch": 15.693832599118943, |
| "grad_norm": 0.1540171355009079, |
| "learning_rate": 5.897435897435898e-06, |
| "loss": 0.0055, |
| "step": 14250 |
| }, |
| { |
| "epoch": 15.72136563876652, |
| "grad_norm": 0.028167154639959335, |
| "learning_rate": 5.871794871794873e-06, |
| "loss": 0.0045, |
| "step": 14275 |
| }, |
| { |
| "epoch": 15.748898678414097, |
| "grad_norm": 0.12515689432621002, |
| "learning_rate": 5.846153846153847e-06, |
| "loss": 0.0047, |
| "step": 14300 |
| }, |
| { |
| "epoch": 15.776431718061675, |
| "grad_norm": 0.04906751960515976, |
| "learning_rate": 5.820512820512822e-06, |
| "loss": 0.0079, |
| "step": 14325 |
| }, |
| { |
| "epoch": 15.80396475770925, |
| "grad_norm": 0.05166306719183922, |
| "learning_rate": 5.794871794871796e-06, |
| "loss": 0.0102, |
| "step": 14350 |
| }, |
| { |
| "epoch": 15.831497797356828, |
| "grad_norm": 0.11004568636417389, |
| "learning_rate": 5.769230769230769e-06, |
| "loss": 0.0067, |
| "step": 14375 |
| }, |
| { |
| "epoch": 15.859030837004406, |
| "grad_norm": 0.1999882608652115, |
| "learning_rate": 5.743589743589743e-06, |
| "loss": 0.007, |
| "step": 14400 |
| }, |
| { |
| "epoch": 15.886563876651982, |
| "grad_norm": 0.34716781973838806, |
| "learning_rate": 5.717948717948718e-06, |
| "loss": 0.0076, |
| "step": 14425 |
| }, |
| { |
| "epoch": 15.91409691629956, |
| "grad_norm": 0.31926947832107544, |
| "learning_rate": 5.692307692307692e-06, |
| "loss": 0.0073, |
| "step": 14450 |
| }, |
| { |
| "epoch": 15.941629955947137, |
| "grad_norm": 0.1822180300951004, |
| "learning_rate": 5.666666666666667e-06, |
| "loss": 0.0061, |
| "step": 14475 |
| }, |
| { |
| "epoch": 15.969162995594713, |
| "grad_norm": 0.029914982616901398, |
| "learning_rate": 5.641025641025641e-06, |
| "loss": 0.0079, |
| "step": 14500 |
| }, |
| { |
| "epoch": 15.996696035242291, |
| "grad_norm": 0.045799557119607925, |
| "learning_rate": 5.615384615384616e-06, |
| "loss": 0.0075, |
| "step": 14525 |
| }, |
| { |
| "epoch": 16.024229074889867, |
| "grad_norm": 0.2783111333847046, |
| "learning_rate": 5.58974358974359e-06, |
| "loss": 0.0081, |
| "step": 14550 |
| }, |
| { |
| "epoch": 16.051762114537446, |
| "grad_norm": 0.059823885560035706, |
| "learning_rate": 5.564102564102565e-06, |
| "loss": 0.0063, |
| "step": 14575 |
| }, |
| { |
| "epoch": 16.079295154185022, |
| "grad_norm": 0.15732045471668243, |
| "learning_rate": 5.538461538461539e-06, |
| "loss": 0.0077, |
| "step": 14600 |
| }, |
| { |
| "epoch": 16.106828193832598, |
| "grad_norm": 0.028589140623807907, |
| "learning_rate": 5.512820512820514e-06, |
| "loss": 0.0054, |
| "step": 14625 |
| }, |
| { |
| "epoch": 16.134361233480178, |
| "grad_norm": 0.10421264916658401, |
| "learning_rate": 5.487179487179488e-06, |
| "loss": 0.0048, |
| "step": 14650 |
| }, |
| { |
| "epoch": 16.161894273127754, |
| "grad_norm": 0.0383140854537487, |
| "learning_rate": 5.461538461538461e-06, |
| "loss": 0.0055, |
| "step": 14675 |
| }, |
| { |
| "epoch": 16.18942731277533, |
| "grad_norm": 0.10105977952480316, |
| "learning_rate": 5.435897435897436e-06, |
| "loss": 0.0044, |
| "step": 14700 |
| }, |
| { |
| "epoch": 16.21696035242291, |
| "grad_norm": 0.027039742097258568, |
| "learning_rate": 5.41025641025641e-06, |
| "loss": 0.0058, |
| "step": 14725 |
| }, |
| { |
| "epoch": 16.244493392070485, |
| "grad_norm": 0.037745460867881775, |
| "learning_rate": 5.384615384615385e-06, |
| "loss": 0.0048, |
| "step": 14750 |
| }, |
| { |
| "epoch": 16.27202643171806, |
| "grad_norm": 0.09512659162282944, |
| "learning_rate": 5.358974358974359e-06, |
| "loss": 0.0073, |
| "step": 14775 |
| }, |
| { |
| "epoch": 16.29955947136564, |
| "grad_norm": 0.4294408857822418, |
| "learning_rate": 5.333333333333334e-06, |
| "loss": 0.0054, |
| "step": 14800 |
| }, |
| { |
| "epoch": 16.327092511013216, |
| "grad_norm": 0.06851428002119064, |
| "learning_rate": 5.307692307692308e-06, |
| "loss": 0.0046, |
| "step": 14825 |
| }, |
| { |
| "epoch": 16.354625550660792, |
| "grad_norm": 0.021705007180571556, |
| "learning_rate": 5.282051282051283e-06, |
| "loss": 0.005, |
| "step": 14850 |
| }, |
| { |
| "epoch": 16.38215859030837, |
| "grad_norm": 0.49824318289756775, |
| "learning_rate": 5.256410256410257e-06, |
| "loss": 0.0061, |
| "step": 14875 |
| }, |
| { |
| "epoch": 16.409691629955947, |
| "grad_norm": 0.3271447718143463, |
| "learning_rate": 5.230769230769232e-06, |
| "loss": 0.0051, |
| "step": 14900 |
| }, |
| { |
| "epoch": 16.437224669603523, |
| "grad_norm": 0.09235868602991104, |
| "learning_rate": 5.205128205128206e-06, |
| "loss": 0.0037, |
| "step": 14925 |
| }, |
| { |
| "epoch": 16.464757709251103, |
| "grad_norm": 0.4005078971385956, |
| "learning_rate": 5.179487179487181e-06, |
| "loss": 0.0087, |
| "step": 14950 |
| }, |
| { |
| "epoch": 16.49229074889868, |
| "grad_norm": 0.2035885453224182, |
| "learning_rate": 5.1538461538461534e-06, |
| "loss": 0.0116, |
| "step": 14975 |
| }, |
| { |
| "epoch": 16.519823788546255, |
| "grad_norm": 0.13736362755298615, |
| "learning_rate": 5.128205128205128e-06, |
| "loss": 0.0044, |
| "step": 15000 |
| }, |
| { |
| "epoch": 16.519823788546255, |
| "eval_cer": 23.58683067187876, |
| "eval_loss": 0.8781108856201172, |
| "eval_runtime": 1297.7364, |
| "eval_samples_per_second": 8.153, |
| "eval_steps_per_second": 2.039, |
| "eval_wer": 83.07402168788308, |
| "step": 15000 |
| }, |
| { |
| "epoch": 16.547356828193834, |
| "grad_norm": 0.06933388859033585, |
| "learning_rate": 5.1025641025641024e-06, |
| "loss": 0.0075, |
| "step": 15025 |
| }, |
| { |
| "epoch": 16.57488986784141, |
| "grad_norm": 0.13286051154136658, |
| "learning_rate": 5.076923076923077e-06, |
| "loss": 0.0051, |
| "step": 15050 |
| }, |
| { |
| "epoch": 16.602422907488986, |
| "grad_norm": 0.026176370680332184, |
| "learning_rate": 5.051282051282051e-06, |
| "loss": 0.0033, |
| "step": 15075 |
| }, |
| { |
| "epoch": 16.629955947136565, |
| "grad_norm": 0.29031485319137573, |
| "learning_rate": 5.025641025641026e-06, |
| "loss": 0.0079, |
| "step": 15100 |
| }, |
| { |
| "epoch": 16.65748898678414, |
| "grad_norm": 0.020291967317461967, |
| "learning_rate": 5e-06, |
| "loss": 0.005, |
| "step": 15125 |
| }, |
| { |
| "epoch": 16.685022026431717, |
| "grad_norm": 0.14216738939285278, |
| "learning_rate": 4.974358974358975e-06, |
| "loss": 0.0033, |
| "step": 15150 |
| }, |
| { |
| "epoch": 16.712555066079297, |
| "grad_norm": 0.012860552407801151, |
| "learning_rate": 4.948717948717949e-06, |
| "loss": 0.0051, |
| "step": 15175 |
| }, |
| { |
| "epoch": 16.740088105726873, |
| "grad_norm": 0.10258791595697403, |
| "learning_rate": 4.923076923076924e-06, |
| "loss": 0.0058, |
| "step": 15200 |
| }, |
| { |
| "epoch": 16.76762114537445, |
| "grad_norm": 0.057229600846767426, |
| "learning_rate": 4.8974358974358975e-06, |
| "loss": 0.0063, |
| "step": 15225 |
| }, |
| { |
| "epoch": 16.795154185022028, |
| "grad_norm": 0.04477262496948242, |
| "learning_rate": 4.871794871794872e-06, |
| "loss": 0.0064, |
| "step": 15250 |
| }, |
| { |
| "epoch": 16.822687224669604, |
| "grad_norm": 0.05280523747205734, |
| "learning_rate": 4.8461538461538465e-06, |
| "loss": 0.005, |
| "step": 15275 |
| }, |
| { |
| "epoch": 16.85022026431718, |
| "grad_norm": 0.2496304214000702, |
| "learning_rate": 4.820512820512821e-06, |
| "loss": 0.0046, |
| "step": 15300 |
| }, |
| { |
| "epoch": 16.87775330396476, |
| "grad_norm": 0.05990584194660187, |
| "learning_rate": 4.7948717948717955e-06, |
| "loss": 0.0034, |
| "step": 15325 |
| }, |
| { |
| "epoch": 16.905286343612335, |
| "grad_norm": 0.03158105909824371, |
| "learning_rate": 4.76923076923077e-06, |
| "loss": 0.0039, |
| "step": 15350 |
| }, |
| { |
| "epoch": 16.93281938325991, |
| "grad_norm": 0.04606786370277405, |
| "learning_rate": 4.743589743589744e-06, |
| "loss": 0.0023, |
| "step": 15375 |
| }, |
| { |
| "epoch": 16.96035242290749, |
| "grad_norm": 0.06241815909743309, |
| "learning_rate": 4.717948717948718e-06, |
| "loss": 0.0043, |
| "step": 15400 |
| }, |
| { |
| "epoch": 16.987885462555067, |
| "grad_norm": 0.10293944925069809, |
| "learning_rate": 4.692307692307693e-06, |
| "loss": 0.0049, |
| "step": 15425 |
| }, |
| { |
| "epoch": 17.015418502202643, |
| "grad_norm": 0.10291476547718048, |
| "learning_rate": 4.666666666666667e-06, |
| "loss": 0.0033, |
| "step": 15450 |
| }, |
| { |
| "epoch": 17.042951541850222, |
| "grad_norm": 0.021487778052687645, |
| "learning_rate": 4.641025641025642e-06, |
| "loss": 0.0039, |
| "step": 15475 |
| }, |
| { |
| "epoch": 17.070484581497798, |
| "grad_norm": 0.0317191444337368, |
| "learning_rate": 4.615384615384616e-06, |
| "loss": 0.0041, |
| "step": 15500 |
| }, |
| { |
| "epoch": 17.098017621145374, |
| "grad_norm": 0.010435504838824272, |
| "learning_rate": 4.58974358974359e-06, |
| "loss": 0.0028, |
| "step": 15525 |
| }, |
| { |
| "epoch": 17.125550660792953, |
| "grad_norm": 0.03608101233839989, |
| "learning_rate": 4.564102564102564e-06, |
| "loss": 0.0036, |
| "step": 15550 |
| }, |
| { |
| "epoch": 17.15308370044053, |
| "grad_norm": 0.04067333787679672, |
| "learning_rate": 4.538461538461539e-06, |
| "loss": 0.0037, |
| "step": 15575 |
| }, |
| { |
| "epoch": 17.180616740088105, |
| "grad_norm": 0.011465054005384445, |
| "learning_rate": 4.512820512820513e-06, |
| "loss": 0.0029, |
| "step": 15600 |
| }, |
| { |
| "epoch": 17.208149779735685, |
| "grad_norm": 0.016354389488697052, |
| "learning_rate": 4.487179487179488e-06, |
| "loss": 0.0025, |
| "step": 15625 |
| }, |
| { |
| "epoch": 17.23568281938326, |
| "grad_norm": 0.023780396208167076, |
| "learning_rate": 4.461538461538462e-06, |
| "loss": 0.0027, |
| "step": 15650 |
| }, |
| { |
| "epoch": 17.263215859030836, |
| "grad_norm": 0.12193647772073746, |
| "learning_rate": 4.435897435897436e-06, |
| "loss": 0.0038, |
| "step": 15675 |
| }, |
| { |
| "epoch": 17.290748898678412, |
| "grad_norm": 0.20270489156246185, |
| "learning_rate": 4.4102564102564104e-06, |
| "loss": 0.0024, |
| "step": 15700 |
| }, |
| { |
| "epoch": 17.318281938325992, |
| "grad_norm": 0.21343407034873962, |
| "learning_rate": 4.384615384615385e-06, |
| "loss": 0.0033, |
| "step": 15725 |
| }, |
| { |
| "epoch": 17.345814977973568, |
| "grad_norm": 0.17167609930038452, |
| "learning_rate": 4.358974358974359e-06, |
| "loss": 0.0038, |
| "step": 15750 |
| }, |
| { |
| "epoch": 17.373348017621144, |
| "grad_norm": 0.07102500647306442, |
| "learning_rate": 4.333333333333334e-06, |
| "loss": 0.0027, |
| "step": 15775 |
| }, |
| { |
| "epoch": 17.400881057268723, |
| "grad_norm": 0.01935497298836708, |
| "learning_rate": 4.307692307692308e-06, |
| "loss": 0.003, |
| "step": 15800 |
| }, |
| { |
| "epoch": 17.4284140969163, |
| "grad_norm": 0.013304700143635273, |
| "learning_rate": 4.282051282051282e-06, |
| "loss": 0.0041, |
| "step": 15825 |
| }, |
| { |
| "epoch": 17.455947136563875, |
| "grad_norm": 0.01759173534810543, |
| "learning_rate": 4.2564102564102566e-06, |
| "loss": 0.0037, |
| "step": 15850 |
| }, |
| { |
| "epoch": 17.483480176211454, |
| "grad_norm": 0.22442099452018738, |
| "learning_rate": 4.230769230769231e-06, |
| "loss": 0.0041, |
| "step": 15875 |
| }, |
| { |
| "epoch": 17.51101321585903, |
| "grad_norm": 0.08358743786811829, |
| "learning_rate": 4.2051282051282055e-06, |
| "loss": 0.0031, |
| "step": 15900 |
| }, |
| { |
| "epoch": 17.538546255506606, |
| "grad_norm": 0.020445672795176506, |
| "learning_rate": 4.17948717948718e-06, |
| "loss": 0.0036, |
| "step": 15925 |
| }, |
| { |
| "epoch": 17.566079295154186, |
| "grad_norm": 0.27063530683517456, |
| "learning_rate": 4.1538461538461545e-06, |
| "loss": 0.0045, |
| "step": 15950 |
| }, |
| { |
| "epoch": 17.59361233480176, |
| "grad_norm": 0.16294091939926147, |
| "learning_rate": 4.128205128205128e-06, |
| "loss": 0.0043, |
| "step": 15975 |
| }, |
| { |
| "epoch": 17.621145374449338, |
| "grad_norm": 0.012531392276287079, |
| "learning_rate": 4.102564102564103e-06, |
| "loss": 0.005, |
| "step": 16000 |
| }, |
| { |
| "epoch": 17.621145374449338, |
| "eval_cer": 22.748845047172512, |
| "eval_loss": 0.8773518204689026, |
| "eval_runtime": 1297.9232, |
| "eval_samples_per_second": 8.152, |
| "eval_steps_per_second": 2.039, |
| "eval_wer": 82.3102310231023, |
| "step": 16000 |
| }, |
| { |
| "epoch": 17.648678414096917, |
| "grad_norm": 0.01578434184193611, |
| "learning_rate": 4.076923076923077e-06, |
| "loss": 0.0031, |
| "step": 16025 |
| }, |
| { |
| "epoch": 17.676211453744493, |
| "grad_norm": 0.21798987686634064, |
| "learning_rate": 4.051282051282052e-06, |
| "loss": 0.0029, |
| "step": 16050 |
| }, |
| { |
| "epoch": 17.70374449339207, |
| "grad_norm": 0.015405426733195782, |
| "learning_rate": 4.025641025641026e-06, |
| "loss": 0.0031, |
| "step": 16075 |
| }, |
| { |
| "epoch": 17.73127753303965, |
| "grad_norm": 0.0639905333518982, |
| "learning_rate": 4.000000000000001e-06, |
| "loss": 0.0045, |
| "step": 16100 |
| }, |
| { |
| "epoch": 17.758810572687224, |
| "grad_norm": 0.03249204158782959, |
| "learning_rate": 3.974358974358974e-06, |
| "loss": 0.0025, |
| "step": 16125 |
| }, |
| { |
| "epoch": 17.7863436123348, |
| "grad_norm": 0.007253581192344427, |
| "learning_rate": 3.948717948717949e-06, |
| "loss": 0.0026, |
| "step": 16150 |
| }, |
| { |
| "epoch": 17.81387665198238, |
| "grad_norm": 0.052789948880672455, |
| "learning_rate": 3.923076923076923e-06, |
| "loss": 0.0023, |
| "step": 16175 |
| }, |
| { |
| "epoch": 17.841409691629956, |
| "grad_norm": 0.028969332575798035, |
| "learning_rate": 3.897435897435898e-06, |
| "loss": 0.0022, |
| "step": 16200 |
| }, |
| { |
| "epoch": 17.86894273127753, |
| "grad_norm": 0.040875017642974854, |
| "learning_rate": 3.871794871794872e-06, |
| "loss": 0.0032, |
| "step": 16225 |
| }, |
| { |
| "epoch": 17.89647577092511, |
| "grad_norm": 0.021907728165388107, |
| "learning_rate": 3.846153846153847e-06, |
| "loss": 0.0048, |
| "step": 16250 |
| }, |
| { |
| "epoch": 17.924008810572687, |
| "grad_norm": 0.03670613095164299, |
| "learning_rate": 3.8205128205128204e-06, |
| "loss": 0.004, |
| "step": 16275 |
| }, |
| { |
| "epoch": 17.951541850220263, |
| "grad_norm": 0.03614376112818718, |
| "learning_rate": 3.794871794871795e-06, |
| "loss": 0.0025, |
| "step": 16300 |
| }, |
| { |
| "epoch": 17.979074889867842, |
| "grad_norm": 0.028139924630522728, |
| "learning_rate": 3.7692307692307694e-06, |
| "loss": 0.0019, |
| "step": 16325 |
| }, |
| { |
| "epoch": 18.006607929515418, |
| "grad_norm": 0.013178820721805096, |
| "learning_rate": 3.743589743589744e-06, |
| "loss": 0.0016, |
| "step": 16350 |
| }, |
| { |
| "epoch": 18.034140969162994, |
| "grad_norm": 0.014315299689769745, |
| "learning_rate": 3.7179487179487184e-06, |
| "loss": 0.0021, |
| "step": 16375 |
| }, |
| { |
| "epoch": 18.061674008810574, |
| "grad_norm": 0.15686722099781036, |
| "learning_rate": 3.692307692307693e-06, |
| "loss": 0.0019, |
| "step": 16400 |
| }, |
| { |
| "epoch": 18.08920704845815, |
| "grad_norm": 0.07895169407129288, |
| "learning_rate": 3.6666666666666666e-06, |
| "loss": 0.0032, |
| "step": 16425 |
| }, |
| { |
| "epoch": 18.116740088105725, |
| "grad_norm": 0.01955692283809185, |
| "learning_rate": 3.641025641025641e-06, |
| "loss": 0.0031, |
| "step": 16450 |
| }, |
| { |
| "epoch": 18.144273127753305, |
| "grad_norm": 0.07983898371458054, |
| "learning_rate": 3.6153846153846156e-06, |
| "loss": 0.0024, |
| "step": 16475 |
| }, |
| { |
| "epoch": 18.17180616740088, |
| "grad_norm": 0.012052379548549652, |
| "learning_rate": 3.58974358974359e-06, |
| "loss": 0.0013, |
| "step": 16500 |
| }, |
| { |
| "epoch": 18.199339207048457, |
| "grad_norm": 0.00846129097044468, |
| "learning_rate": 3.5641025641025646e-06, |
| "loss": 0.0024, |
| "step": 16525 |
| }, |
| { |
| "epoch": 18.226872246696036, |
| "grad_norm": 0.015683434903621674, |
| "learning_rate": 3.538461538461539e-06, |
| "loss": 0.0028, |
| "step": 16550 |
| }, |
| { |
| "epoch": 18.254405286343612, |
| "grad_norm": 0.0070321750827133656, |
| "learning_rate": 3.5128205128205127e-06, |
| "loss": 0.0021, |
| "step": 16575 |
| }, |
| { |
| "epoch": 18.281938325991188, |
| "grad_norm": 0.037714555859565735, |
| "learning_rate": 3.487179487179487e-06, |
| "loss": 0.0012, |
| "step": 16600 |
| }, |
| { |
| "epoch": 18.309471365638768, |
| "grad_norm": 0.010739946737885475, |
| "learning_rate": 3.4615384615384617e-06, |
| "loss": 0.0011, |
| "step": 16625 |
| }, |
| { |
| "epoch": 18.337004405286343, |
| "grad_norm": 0.009480941109359264, |
| "learning_rate": 3.435897435897436e-06, |
| "loss": 0.0018, |
| "step": 16650 |
| }, |
| { |
| "epoch": 18.36453744493392, |
| "grad_norm": 0.011291285045444965, |
| "learning_rate": 3.4102564102564107e-06, |
| "loss": 0.0023, |
| "step": 16675 |
| }, |
| { |
| "epoch": 18.3920704845815, |
| "grad_norm": 0.014357962645590305, |
| "learning_rate": 3.384615384615385e-06, |
| "loss": 0.002, |
| "step": 16700 |
| }, |
| { |
| "epoch": 18.419603524229075, |
| "grad_norm": 0.005591503344476223, |
| "learning_rate": 3.358974358974359e-06, |
| "loss": 0.0015, |
| "step": 16725 |
| }, |
| { |
| "epoch": 18.44713656387665, |
| "grad_norm": 0.014310234226286411, |
| "learning_rate": 3.3333333333333333e-06, |
| "loss": 0.0027, |
| "step": 16750 |
| }, |
| { |
| "epoch": 18.47466960352423, |
| "grad_norm": 0.026868004351854324, |
| "learning_rate": 3.307692307692308e-06, |
| "loss": 0.0018, |
| "step": 16775 |
| }, |
| { |
| "epoch": 18.502202643171806, |
| "grad_norm": 0.011405620723962784, |
| "learning_rate": 3.2820512820512823e-06, |
| "loss": 0.0021, |
| "step": 16800 |
| }, |
| { |
| "epoch": 18.529735682819382, |
| "grad_norm": 0.007671520579606295, |
| "learning_rate": 3.256410256410257e-06, |
| "loss": 0.0021, |
| "step": 16825 |
| }, |
| { |
| "epoch": 18.55726872246696, |
| "grad_norm": 0.09985347092151642, |
| "learning_rate": 3.2307692307692313e-06, |
| "loss": 0.0045, |
| "step": 16850 |
| }, |
| { |
| "epoch": 18.584801762114537, |
| "grad_norm": 0.0127689428627491, |
| "learning_rate": 3.205128205128206e-06, |
| "loss": 0.0031, |
| "step": 16875 |
| }, |
| { |
| "epoch": 18.612334801762113, |
| "grad_norm": 0.12681765854358673, |
| "learning_rate": 3.1794871794871795e-06, |
| "loss": 0.0013, |
| "step": 16900 |
| }, |
| { |
| "epoch": 18.639867841409693, |
| "grad_norm": 0.008006567135453224, |
| "learning_rate": 3.153846153846154e-06, |
| "loss": 0.0015, |
| "step": 16925 |
| }, |
| { |
| "epoch": 18.66740088105727, |
| "grad_norm": 0.020465383306145668, |
| "learning_rate": 3.1282051282051284e-06, |
| "loss": 0.0016, |
| "step": 16950 |
| }, |
| { |
| "epoch": 18.694933920704845, |
| "grad_norm": 0.00684625469148159, |
| "learning_rate": 3.102564102564103e-06, |
| "loss": 0.0013, |
| "step": 16975 |
| }, |
| { |
| "epoch": 18.722466960352424, |
| "grad_norm": 0.011861232109367847, |
| "learning_rate": 3.0769230769230774e-06, |
| "loss": 0.0026, |
| "step": 17000 |
| }, |
| { |
| "epoch": 18.722466960352424, |
| "eval_cer": 22.531610743640595, |
| "eval_loss": 0.8913528919219971, |
| "eval_runtime": 1295.7696, |
| "eval_samples_per_second": 8.166, |
| "eval_steps_per_second": 2.042, |
| "eval_wer": 82.14992927864215, |
| "step": 17000 |
| }, |
| { |
| "epoch": 18.75, |
| "grad_norm": 0.017606543377041817, |
| "learning_rate": 3.051282051282052e-06, |
| "loss": 0.0032, |
| "step": 17025 |
| }, |
| { |
| "epoch": 18.777533039647576, |
| "grad_norm": 0.009970780462026596, |
| "learning_rate": 3.0256410256410256e-06, |
| "loss": 0.0018, |
| "step": 17050 |
| }, |
| { |
| "epoch": 18.805066079295155, |
| "grad_norm": 0.07100367546081543, |
| "learning_rate": 3e-06, |
| "loss": 0.0015, |
| "step": 17075 |
| }, |
| { |
| "epoch": 18.83259911894273, |
| "grad_norm": 0.010995105840265751, |
| "learning_rate": 2.9743589743589746e-06, |
| "loss": 0.0024, |
| "step": 17100 |
| }, |
| { |
| "epoch": 18.860132158590307, |
| "grad_norm": 0.20596833527088165, |
| "learning_rate": 2.948717948717949e-06, |
| "loss": 0.0027, |
| "step": 17125 |
| }, |
| { |
| "epoch": 18.887665198237887, |
| "grad_norm": 0.014417466707527637, |
| "learning_rate": 2.9230769230769236e-06, |
| "loss": 0.0018, |
| "step": 17150 |
| }, |
| { |
| "epoch": 18.915198237885463, |
| "grad_norm": 0.025823669508099556, |
| "learning_rate": 2.897435897435898e-06, |
| "loss": 0.0014, |
| "step": 17175 |
| }, |
| { |
| "epoch": 18.94273127753304, |
| "grad_norm": 0.009362194687128067, |
| "learning_rate": 2.8717948717948717e-06, |
| "loss": 0.0027, |
| "step": 17200 |
| }, |
| { |
| "epoch": 18.970264317180618, |
| "grad_norm": 0.07939445227384567, |
| "learning_rate": 2.846153846153846e-06, |
| "loss": 0.0017, |
| "step": 17225 |
| }, |
| { |
| "epoch": 18.997797356828194, |
| "grad_norm": 0.011641057208180428, |
| "learning_rate": 2.8205128205128207e-06, |
| "loss": 0.0013, |
| "step": 17250 |
| }, |
| { |
| "epoch": 19.02533039647577, |
| "grad_norm": 0.006191305350512266, |
| "learning_rate": 2.794871794871795e-06, |
| "loss": 0.0018, |
| "step": 17275 |
| }, |
| { |
| "epoch": 19.05286343612335, |
| "grad_norm": 0.007426435127854347, |
| "learning_rate": 2.7692307692307697e-06, |
| "loss": 0.001, |
| "step": 17300 |
| }, |
| { |
| "epoch": 19.080396475770925, |
| "grad_norm": 0.04474742338061333, |
| "learning_rate": 2.743589743589744e-06, |
| "loss": 0.003, |
| "step": 17325 |
| }, |
| { |
| "epoch": 19.1079295154185, |
| "grad_norm": 0.026934364810585976, |
| "learning_rate": 2.717948717948718e-06, |
| "loss": 0.0019, |
| "step": 17350 |
| }, |
| { |
| "epoch": 19.13546255506608, |
| "grad_norm": 0.008537307381629944, |
| "learning_rate": 2.6923076923076923e-06, |
| "loss": 0.001, |
| "step": 17375 |
| }, |
| { |
| "epoch": 19.162995594713657, |
| "grad_norm": 0.06050959601998329, |
| "learning_rate": 2.666666666666667e-06, |
| "loss": 0.0016, |
| "step": 17400 |
| }, |
| { |
| "epoch": 19.190528634361232, |
| "grad_norm": 0.010981615632772446, |
| "learning_rate": 2.6410256410256413e-06, |
| "loss": 0.0012, |
| "step": 17425 |
| }, |
| { |
| "epoch": 19.218061674008812, |
| "grad_norm": 0.01226646639406681, |
| "learning_rate": 2.615384615384616e-06, |
| "loss": 0.0012, |
| "step": 17450 |
| }, |
| { |
| "epoch": 19.245594713656388, |
| "grad_norm": 0.00811366643756628, |
| "learning_rate": 2.5897435897435903e-06, |
| "loss": 0.0013, |
| "step": 17475 |
| }, |
| { |
| "epoch": 19.273127753303964, |
| "grad_norm": 0.009269953705370426, |
| "learning_rate": 2.564102564102564e-06, |
| "loss": 0.0011, |
| "step": 17500 |
| }, |
| { |
| "epoch": 19.300660792951543, |
| "grad_norm": 0.00818623322993517, |
| "learning_rate": 2.5384615384615385e-06, |
| "loss": 0.0024, |
| "step": 17525 |
| }, |
| { |
| "epoch": 19.32819383259912, |
| "grad_norm": 0.011427664197981358, |
| "learning_rate": 2.512820512820513e-06, |
| "loss": 0.001, |
| "step": 17550 |
| }, |
| { |
| "epoch": 19.355726872246695, |
| "grad_norm": 0.009334642440080643, |
| "learning_rate": 2.4871794871794875e-06, |
| "loss": 0.0018, |
| "step": 17575 |
| }, |
| { |
| "epoch": 19.383259911894275, |
| "grad_norm": 0.0788646936416626, |
| "learning_rate": 2.461538461538462e-06, |
| "loss": 0.0014, |
| "step": 17600 |
| }, |
| { |
| "epoch": 19.41079295154185, |
| "grad_norm": 0.07220646739006042, |
| "learning_rate": 2.435897435897436e-06, |
| "loss": 0.0015, |
| "step": 17625 |
| }, |
| { |
| "epoch": 19.438325991189426, |
| "grad_norm": 0.01058241818100214, |
| "learning_rate": 2.4102564102564105e-06, |
| "loss": 0.0023, |
| "step": 17650 |
| }, |
| { |
| "epoch": 19.465859030837006, |
| "grad_norm": 0.10551930218935013, |
| "learning_rate": 2.384615384615385e-06, |
| "loss": 0.0017, |
| "step": 17675 |
| }, |
| { |
| "epoch": 19.493392070484582, |
| "grad_norm": 0.009333525784313679, |
| "learning_rate": 2.358974358974359e-06, |
| "loss": 0.0015, |
| "step": 17700 |
| }, |
| { |
| "epoch": 19.520925110132158, |
| "grad_norm": 0.011308999732136726, |
| "learning_rate": 2.3333333333333336e-06, |
| "loss": 0.0025, |
| "step": 17725 |
| }, |
| { |
| "epoch": 19.548458149779737, |
| "grad_norm": 0.009809375740587711, |
| "learning_rate": 2.307692307692308e-06, |
| "loss": 0.0014, |
| "step": 17750 |
| }, |
| { |
| "epoch": 19.575991189427313, |
| "grad_norm": 0.01724208891391754, |
| "learning_rate": 2.282051282051282e-06, |
| "loss": 0.0016, |
| "step": 17775 |
| }, |
| { |
| "epoch": 19.60352422907489, |
| "grad_norm": 0.014492113143205643, |
| "learning_rate": 2.2564102564102566e-06, |
| "loss": 0.0009, |
| "step": 17800 |
| }, |
| { |
| "epoch": 19.63105726872247, |
| "grad_norm": 0.010606609284877777, |
| "learning_rate": 2.230769230769231e-06, |
| "loss": 0.0014, |
| "step": 17825 |
| }, |
| { |
| "epoch": 19.658590308370044, |
| "grad_norm": 0.009576292708516121, |
| "learning_rate": 2.2051282051282052e-06, |
| "loss": 0.0008, |
| "step": 17850 |
| }, |
| { |
| "epoch": 19.68612334801762, |
| "grad_norm": 0.005126140080392361, |
| "learning_rate": 2.1794871794871797e-06, |
| "loss": 0.0012, |
| "step": 17875 |
| }, |
| { |
| "epoch": 19.7136563876652, |
| "grad_norm": 0.006038044113665819, |
| "learning_rate": 2.153846153846154e-06, |
| "loss": 0.0017, |
| "step": 17900 |
| }, |
| { |
| "epoch": 19.741189427312776, |
| "grad_norm": 0.15704374015331268, |
| "learning_rate": 2.1282051282051283e-06, |
| "loss": 0.0017, |
| "step": 17925 |
| }, |
| { |
| "epoch": 19.76872246696035, |
| "grad_norm": 0.008588683791458607, |
| "learning_rate": 2.1025641025641028e-06, |
| "loss": 0.0009, |
| "step": 17950 |
| }, |
| { |
| "epoch": 19.79625550660793, |
| "grad_norm": 0.023944512009620667, |
| "learning_rate": 2.0769230769230773e-06, |
| "loss": 0.0011, |
| "step": 17975 |
| }, |
| { |
| "epoch": 19.823788546255507, |
| "grad_norm": 0.013375409878790379, |
| "learning_rate": 2.0512820512820513e-06, |
| "loss": 0.0015, |
| "step": 18000 |
| }, |
| { |
| "epoch": 19.823788546255507, |
| "eval_cer": 22.31323008494492, |
| "eval_loss": 0.8889961838722229, |
| "eval_runtime": 1299.2034, |
| "eval_samples_per_second": 8.144, |
| "eval_steps_per_second": 2.037, |
| "eval_wer": 81.98962753418199, |
| "step": 18000 |
| }, |
| { |
| "epoch": 19.851321585903083, |
| "grad_norm": 0.0055061751045286655, |
| "learning_rate": 2.025641025641026e-06, |
| "loss": 0.002, |
| "step": 18025 |
| }, |
| { |
| "epoch": 19.878854625550662, |
| "grad_norm": 0.00620003929361701, |
| "learning_rate": 2.0000000000000003e-06, |
| "loss": 0.001, |
| "step": 18050 |
| }, |
| { |
| "epoch": 19.90638766519824, |
| "grad_norm": 0.012960962951183319, |
| "learning_rate": 1.9743589743589744e-06, |
| "loss": 0.001, |
| "step": 18075 |
| }, |
| { |
| "epoch": 19.933920704845814, |
| "grad_norm": 0.00596979632973671, |
| "learning_rate": 1.948717948717949e-06, |
| "loss": 0.0016, |
| "step": 18100 |
| }, |
| { |
| "epoch": 19.961453744493394, |
| "grad_norm": 0.00921118725091219, |
| "learning_rate": 1.9230769230769234e-06, |
| "loss": 0.0015, |
| "step": 18125 |
| }, |
| { |
| "epoch": 19.98898678414097, |
| "grad_norm": 0.008434862829744816, |
| "learning_rate": 1.8974358974358975e-06, |
| "loss": 0.0012, |
| "step": 18150 |
| }, |
| { |
| "epoch": 20.016519823788546, |
| "grad_norm": 0.004028468858450651, |
| "learning_rate": 1.871794871794872e-06, |
| "loss": 0.0012, |
| "step": 18175 |
| }, |
| { |
| "epoch": 20.044052863436125, |
| "grad_norm": 0.005844887346029282, |
| "learning_rate": 1.8461538461538465e-06, |
| "loss": 0.0009, |
| "step": 18200 |
| }, |
| { |
| "epoch": 20.0715859030837, |
| "grad_norm": 0.0038028398994356394, |
| "learning_rate": 1.8205128205128205e-06, |
| "loss": 0.0008, |
| "step": 18225 |
| }, |
| { |
| "epoch": 20.099118942731277, |
| "grad_norm": 0.0037237314973026514, |
| "learning_rate": 1.794871794871795e-06, |
| "loss": 0.0009, |
| "step": 18250 |
| }, |
| { |
| "epoch": 20.126651982378856, |
| "grad_norm": 0.003872903762385249, |
| "learning_rate": 1.7692307692307695e-06, |
| "loss": 0.0008, |
| "step": 18275 |
| }, |
| { |
| "epoch": 20.154185022026432, |
| "grad_norm": 0.007000759243965149, |
| "learning_rate": 1.7435897435897436e-06, |
| "loss": 0.0007, |
| "step": 18300 |
| }, |
| { |
| "epoch": 20.181718061674008, |
| "grad_norm": 0.12840576469898224, |
| "learning_rate": 1.717948717948718e-06, |
| "loss": 0.0009, |
| "step": 18325 |
| }, |
| { |
| "epoch": 20.209251101321588, |
| "grad_norm": 0.004936088342219591, |
| "learning_rate": 1.6923076923076926e-06, |
| "loss": 0.001, |
| "step": 18350 |
| }, |
| { |
| "epoch": 20.236784140969164, |
| "grad_norm": 0.008810814470052719, |
| "learning_rate": 1.6666666666666667e-06, |
| "loss": 0.0013, |
| "step": 18375 |
| }, |
| { |
| "epoch": 20.26431718061674, |
| "grad_norm": 0.008568311110138893, |
| "learning_rate": 1.6410256410256412e-06, |
| "loss": 0.0011, |
| "step": 18400 |
| }, |
| { |
| "epoch": 20.291850220264315, |
| "grad_norm": 0.006905156187713146, |
| "learning_rate": 1.6153846153846157e-06, |
| "loss": 0.001, |
| "step": 18425 |
| }, |
| { |
| "epoch": 20.319383259911895, |
| "grad_norm": 0.010452316142618656, |
| "learning_rate": 1.5897435897435897e-06, |
| "loss": 0.0013, |
| "step": 18450 |
| }, |
| { |
| "epoch": 20.34691629955947, |
| "grad_norm": 0.1557583063840866, |
| "learning_rate": 1.5641025641025642e-06, |
| "loss": 0.001, |
| "step": 18475 |
| }, |
| { |
| "epoch": 20.374449339207047, |
| "grad_norm": 0.006585159804672003, |
| "learning_rate": 1.5384615384615387e-06, |
| "loss": 0.0012, |
| "step": 18500 |
| }, |
| { |
| "epoch": 20.401982378854626, |
| "grad_norm": 0.006628986913710833, |
| "learning_rate": 1.5128205128205128e-06, |
| "loss": 0.0018, |
| "step": 18525 |
| }, |
| { |
| "epoch": 20.429515418502202, |
| "grad_norm": 0.007554872892796993, |
| "learning_rate": 1.4871794871794873e-06, |
| "loss": 0.0008, |
| "step": 18550 |
| }, |
| { |
| "epoch": 20.457048458149778, |
| "grad_norm": 0.005659104790538549, |
| "learning_rate": 1.4615384615384618e-06, |
| "loss": 0.0011, |
| "step": 18575 |
| }, |
| { |
| "epoch": 20.484581497797357, |
| "grad_norm": 0.00462260702624917, |
| "learning_rate": 1.4358974358974359e-06, |
| "loss": 0.0007, |
| "step": 18600 |
| }, |
| { |
| "epoch": 20.512114537444933, |
| "grad_norm": 0.0087654460221529, |
| "learning_rate": 1.4102564102564104e-06, |
| "loss": 0.001, |
| "step": 18625 |
| }, |
| { |
| "epoch": 20.53964757709251, |
| "grad_norm": 0.006052209064364433, |
| "learning_rate": 1.3846153846153848e-06, |
| "loss": 0.001, |
| "step": 18650 |
| }, |
| { |
| "epoch": 20.56718061674009, |
| "grad_norm": 0.004353426396846771, |
| "learning_rate": 1.358974358974359e-06, |
| "loss": 0.0008, |
| "step": 18675 |
| }, |
| { |
| "epoch": 20.594713656387665, |
| "grad_norm": 0.08834078162908554, |
| "learning_rate": 1.3333333333333334e-06, |
| "loss": 0.0011, |
| "step": 18700 |
| }, |
| { |
| "epoch": 20.62224669603524, |
| "grad_norm": 0.006872022990137339, |
| "learning_rate": 1.307692307692308e-06, |
| "loss": 0.0008, |
| "step": 18725 |
| }, |
| { |
| "epoch": 20.64977973568282, |
| "grad_norm": 0.004435420036315918, |
| "learning_rate": 1.282051282051282e-06, |
| "loss": 0.0006, |
| "step": 18750 |
| }, |
| { |
| "epoch": 20.677312775330396, |
| "grad_norm": 0.00624303100630641, |
| "learning_rate": 1.2564102564102565e-06, |
| "loss": 0.0007, |
| "step": 18775 |
| }, |
| { |
| "epoch": 20.704845814977972, |
| "grad_norm": 0.00411389023065567, |
| "learning_rate": 1.230769230769231e-06, |
| "loss": 0.0008, |
| "step": 18800 |
| }, |
| { |
| "epoch": 20.73237885462555, |
| "grad_norm": 0.09965424239635468, |
| "learning_rate": 1.2051282051282053e-06, |
| "loss": 0.0011, |
| "step": 18825 |
| }, |
| { |
| "epoch": 20.759911894273127, |
| "grad_norm": 0.004442107398062944, |
| "learning_rate": 1.1794871794871795e-06, |
| "loss": 0.0007, |
| "step": 18850 |
| }, |
| { |
| "epoch": 20.787444933920703, |
| "grad_norm": 0.004814086947590113, |
| "learning_rate": 1.153846153846154e-06, |
| "loss": 0.0007, |
| "step": 18875 |
| }, |
| { |
| "epoch": 20.814977973568283, |
| "grad_norm": 0.006195340771228075, |
| "learning_rate": 1.1282051282051283e-06, |
| "loss": 0.0008, |
| "step": 18900 |
| }, |
| { |
| "epoch": 20.84251101321586, |
| "grad_norm": 0.006218872033059597, |
| "learning_rate": 1.1025641025641026e-06, |
| "loss": 0.001, |
| "step": 18925 |
| }, |
| { |
| "epoch": 20.870044052863435, |
| "grad_norm": 0.0083702951669693, |
| "learning_rate": 1.076923076923077e-06, |
| "loss": 0.0008, |
| "step": 18950 |
| }, |
| { |
| "epoch": 20.897577092511014, |
| "grad_norm": 0.004893292207270861, |
| "learning_rate": 1.0512820512820514e-06, |
| "loss": 0.0011, |
| "step": 18975 |
| }, |
| { |
| "epoch": 20.92511013215859, |
| "grad_norm": 0.011980378068983555, |
| "learning_rate": 1.0256410256410257e-06, |
| "loss": 0.0011, |
| "step": 19000 |
| }, |
| { |
| "epoch": 20.92511013215859, |
| "eval_cer": 22.251326906102047, |
| "eval_loss": 0.892787754535675, |
| "eval_runtime": 1296.9753, |
| "eval_samples_per_second": 8.158, |
| "eval_steps_per_second": 2.04, |
| "eval_wer": 81.62187647336162, |
| "step": 19000 |
| }, |
| { |
| "epoch": 20.952643171806166, |
| "grad_norm": 0.0054518114775419235, |
| "learning_rate": 1.0000000000000002e-06, |
| "loss": 0.0008, |
| "step": 19025 |
| }, |
| { |
| "epoch": 20.980176211453745, |
| "grad_norm": 0.0044789970852434635, |
| "learning_rate": 9.743589743589745e-07, |
| "loss": 0.0016, |
| "step": 19050 |
| }, |
| { |
| "epoch": 21.00770925110132, |
| "grad_norm": 0.007212365977466106, |
| "learning_rate": 9.487179487179487e-07, |
| "loss": 0.0007, |
| "step": 19075 |
| }, |
| { |
| "epoch": 21.035242290748897, |
| "grad_norm": 0.0058553945273160934, |
| "learning_rate": 9.230769230769232e-07, |
| "loss": 0.0006, |
| "step": 19100 |
| }, |
| { |
| "epoch": 21.062775330396477, |
| "grad_norm": 0.0037513140123337507, |
| "learning_rate": 8.974358974358975e-07, |
| "loss": 0.0007, |
| "step": 19125 |
| }, |
| { |
| "epoch": 21.090308370044053, |
| "grad_norm": 0.0065911454148590565, |
| "learning_rate": 8.717948717948718e-07, |
| "loss": 0.0006, |
| "step": 19150 |
| }, |
| { |
| "epoch": 21.11784140969163, |
| "grad_norm": 0.00529465964064002, |
| "learning_rate": 8.461538461538463e-07, |
| "loss": 0.0007, |
| "step": 19175 |
| }, |
| { |
| "epoch": 21.145374449339208, |
| "grad_norm": 0.004264230374246836, |
| "learning_rate": 8.205128205128206e-07, |
| "loss": 0.0006, |
| "step": 19200 |
| }, |
| { |
| "epoch": 21.172907488986784, |
| "grad_norm": 0.0038558640517294407, |
| "learning_rate": 7.948717948717949e-07, |
| "loss": 0.0007, |
| "step": 19225 |
| }, |
| { |
| "epoch": 21.20044052863436, |
| "grad_norm": 0.0034747957251966, |
| "learning_rate": 7.692307692307694e-07, |
| "loss": 0.0006, |
| "step": 19250 |
| }, |
| { |
| "epoch": 21.22797356828194, |
| "grad_norm": 0.004676154814660549, |
| "learning_rate": 7.435897435897436e-07, |
| "loss": 0.0006, |
| "step": 19275 |
| }, |
| { |
| "epoch": 21.255506607929515, |
| "grad_norm": 0.005274807568639517, |
| "learning_rate": 7.179487179487179e-07, |
| "loss": 0.0006, |
| "step": 19300 |
| }, |
| { |
| "epoch": 21.28303964757709, |
| "grad_norm": 0.005552313756197691, |
| "learning_rate": 6.923076923076924e-07, |
| "loss": 0.0006, |
| "step": 19325 |
| }, |
| { |
| "epoch": 21.31057268722467, |
| "grad_norm": 0.0034383879974484444, |
| "learning_rate": 6.666666666666667e-07, |
| "loss": 0.0006, |
| "step": 19350 |
| }, |
| { |
| "epoch": 21.338105726872246, |
| "grad_norm": 0.00359016889706254, |
| "learning_rate": 6.41025641025641e-07, |
| "loss": 0.0006, |
| "step": 19375 |
| }, |
| { |
| "epoch": 21.365638766519822, |
| "grad_norm": 0.007120490074157715, |
| "learning_rate": 6.153846153846155e-07, |
| "loss": 0.0006, |
| "step": 19400 |
| }, |
| { |
| "epoch": 21.393171806167402, |
| "grad_norm": 0.005752895027399063, |
| "learning_rate": 5.897435897435898e-07, |
| "loss": 0.0007, |
| "step": 19425 |
| }, |
| { |
| "epoch": 21.420704845814978, |
| "grad_norm": 0.003924291115254164, |
| "learning_rate": 5.641025641025642e-07, |
| "loss": 0.0006, |
| "step": 19450 |
| }, |
| { |
| "epoch": 21.448237885462554, |
| "grad_norm": 0.0068072304129600525, |
| "learning_rate": 5.384615384615386e-07, |
| "loss": 0.0017, |
| "step": 19475 |
| }, |
| { |
| "epoch": 21.475770925110133, |
| "grad_norm": 0.003380249021574855, |
| "learning_rate": 5.128205128205128e-07, |
| "loss": 0.0008, |
| "step": 19500 |
| }, |
| { |
| "epoch": 21.50330396475771, |
| "grad_norm": 0.003210252383723855, |
| "learning_rate": 4.871794871794872e-07, |
| "loss": 0.0006, |
| "step": 19525 |
| }, |
| { |
| "epoch": 21.530837004405285, |
| "grad_norm": 0.12751127779483795, |
| "learning_rate": 4.615384615384616e-07, |
| "loss": 0.0006, |
| "step": 19550 |
| }, |
| { |
| "epoch": 21.558370044052865, |
| "grad_norm": 0.0041765086352825165, |
| "learning_rate": 4.358974358974359e-07, |
| "loss": 0.0006, |
| "step": 19575 |
| }, |
| { |
| "epoch": 21.58590308370044, |
| "grad_norm": 0.17753562331199646, |
| "learning_rate": 4.102564102564103e-07, |
| "loss": 0.0007, |
| "step": 19600 |
| }, |
| { |
| "epoch": 21.613436123348016, |
| "grad_norm": 0.003080737078562379, |
| "learning_rate": 3.846153846153847e-07, |
| "loss": 0.0007, |
| "step": 19625 |
| }, |
| { |
| "epoch": 21.640969162995596, |
| "grad_norm": 0.004255061503499746, |
| "learning_rate": 3.5897435897435896e-07, |
| "loss": 0.0008, |
| "step": 19650 |
| }, |
| { |
| "epoch": 21.66850220264317, |
| "grad_norm": 0.003112249309197068, |
| "learning_rate": 3.3333333333333335e-07, |
| "loss": 0.0006, |
| "step": 19675 |
| }, |
| { |
| "epoch": 21.696035242290748, |
| "grad_norm": 0.0038673074450343847, |
| "learning_rate": 3.0769230769230774e-07, |
| "loss": 0.0013, |
| "step": 19700 |
| }, |
| { |
| "epoch": 21.723568281938327, |
| "grad_norm": 0.004677412565797567, |
| "learning_rate": 2.820512820512821e-07, |
| "loss": 0.0009, |
| "step": 19725 |
| }, |
| { |
| "epoch": 21.751101321585903, |
| "grad_norm": 0.0037716683000326157, |
| "learning_rate": 2.564102564102564e-07, |
| "loss": 0.0006, |
| "step": 19750 |
| }, |
| { |
| "epoch": 21.77863436123348, |
| "grad_norm": 0.004941494669765234, |
| "learning_rate": 2.307692307692308e-07, |
| "loss": 0.0006, |
| "step": 19775 |
| }, |
| { |
| "epoch": 21.80616740088106, |
| "grad_norm": 0.007174940314143896, |
| "learning_rate": 2.0512820512820514e-07, |
| "loss": 0.0006, |
| "step": 19800 |
| }, |
| { |
| "epoch": 21.833700440528634, |
| "grad_norm": 0.004461018834263086, |
| "learning_rate": 1.7948717948717948e-07, |
| "loss": 0.0006, |
| "step": 19825 |
| }, |
| { |
| "epoch": 21.86123348017621, |
| "grad_norm": 0.0061052520759403706, |
| "learning_rate": 1.5384615384615387e-07, |
| "loss": 0.0006, |
| "step": 19850 |
| }, |
| { |
| "epoch": 21.88876651982379, |
| "grad_norm": 0.00467773899435997, |
| "learning_rate": 1.282051282051282e-07, |
| "loss": 0.0006, |
| "step": 19875 |
| }, |
| { |
| "epoch": 21.916299559471366, |
| "grad_norm": 0.003872169181704521, |
| "learning_rate": 1.0256410256410257e-07, |
| "loss": 0.0006, |
| "step": 19900 |
| }, |
| { |
| "epoch": 21.94383259911894, |
| "grad_norm": 0.0046131303533911705, |
| "learning_rate": 7.692307692307694e-08, |
| "loss": 0.0006, |
| "step": 19925 |
| }, |
| { |
| "epoch": 21.97136563876652, |
| "grad_norm": 0.007839877158403397, |
| "learning_rate": 5.1282051282051286e-08, |
| "loss": 0.0007, |
| "step": 19950 |
| }, |
| { |
| "epoch": 21.998898678414097, |
| "grad_norm": 0.004426113795489073, |
| "learning_rate": 2.5641025641025643e-08, |
| "loss": 0.0007, |
| "step": 19975 |
| }, |
| { |
| "epoch": 22.026431718061673, |
| "grad_norm": 0.00471852533519268, |
| "learning_rate": 0.0, |
| "loss": 0.0006, |
| "step": 20000 |
| }, |
| { |
| "epoch": 22.026431718061673, |
| "eval_cer": 22.213497185698074, |
| "eval_loss": 0.8962610960006714, |
| "eval_runtime": 1296.5309, |
| "eval_samples_per_second": 8.161, |
| "eval_steps_per_second": 2.041, |
| "eval_wer": 81.71617161716172, |
| "step": 20000 |
| }, |
| { |
| "epoch": 22.026431718061673, |
| "step": 20000, |
| "total_flos": 3.4362863729801953e+20, |
| "train_loss": 0.2747690211042762, |
| "train_runtime": 126669.5289, |
| "train_samples_per_second": 5.053, |
| "train_steps_per_second": 0.158 |
| } |
| ], |
| "logging_steps": 25, |
| "max_steps": 20000, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 23, |
| "save_steps": 1000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 3.4362863729801953e+20, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|