Invalid JSON:
Unexpected token 'I', ..."ad_norm": Infinity,
"... is not valid JSON
| { | |
| "best_metric": 26.328800988875155, | |
| "best_model_checkpoint": "results/whisper-tiny/marathi/checkpoint-12000", | |
| "epoch": 14.775016789791806, | |
| "eval_steps": 1000, | |
| "global_step": 22000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.02, | |
| "grad_norm": 74.15543365478516, | |
| "learning_rate": 4.4e-07, | |
| "loss": 3.7766, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "grad_norm": 39.68334197998047, | |
| "learning_rate": 9.200000000000001e-07, | |
| "loss": 3.2026, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "grad_norm": 14.481256484985352, | |
| "learning_rate": 1.42e-06, | |
| "loss": 2.4672, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "grad_norm": 8.309144020080566, | |
| "learning_rate": 1.9200000000000003e-06, | |
| "loss": 1.9195, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 6.247703552246094, | |
| "learning_rate": 2.42e-06, | |
| "loss": 1.5361, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "grad_norm": 5.86753511428833, | |
| "learning_rate": 2.92e-06, | |
| "loss": 1.2775, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "grad_norm": 5.9364752769470215, | |
| "learning_rate": 3.4200000000000007e-06, | |
| "loss": 1.0933, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "grad_norm": 5.17349910736084, | |
| "learning_rate": 3.920000000000001e-06, | |
| "loss": 0.9505, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "grad_norm": 5.331369400024414, | |
| "learning_rate": 4.42e-06, | |
| "loss": 0.8507, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "grad_norm": 5.466459274291992, | |
| "learning_rate": 4.92e-06, | |
| "loss": 0.7655, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "grad_norm": 4.921384811401367, | |
| "learning_rate": 5.420000000000001e-06, | |
| "loss": 0.7153, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "grad_norm": 5.227000713348389, | |
| "learning_rate": 5.92e-06, | |
| "loss": 0.6886, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "grad_norm": 4.924015045166016, | |
| "learning_rate": 6.42e-06, | |
| "loss": 0.6324, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "grad_norm": 4.0168986320495605, | |
| "learning_rate": 6.92e-06, | |
| "loss": 0.6107, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "grad_norm": 4.831826686859131, | |
| "learning_rate": 7.420000000000001e-06, | |
| "loss": 0.5784, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "grad_norm": 4.7476935386657715, | |
| "learning_rate": 7.92e-06, | |
| "loss": 0.5509, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "grad_norm": 4.2020978927612305, | |
| "learning_rate": 8.42e-06, | |
| "loss": 0.5442, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "grad_norm": 4.830783843994141, | |
| "learning_rate": 8.920000000000001e-06, | |
| "loss": 0.5297, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 4.747669696807861, | |
| "learning_rate": 9.42e-06, | |
| "loss": 0.5059, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "grad_norm": 4.504109859466553, | |
| "learning_rate": 9.920000000000002e-06, | |
| "loss": 0.4927, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "grad_norm": 4.707924842834473, | |
| "learning_rate": 9.997889447236182e-06, | |
| "loss": 0.4721, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "grad_norm": 4.621720790863037, | |
| "learning_rate": 9.995376884422112e-06, | |
| "loss": 0.464, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "grad_norm": 4.50490140914917, | |
| "learning_rate": 9.992864321608041e-06, | |
| "loss": 0.4518, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 4.592816352844238, | |
| "learning_rate": 9.99035175879397e-06, | |
| "loss": 0.4335, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "grad_norm": 4.791091442108154, | |
| "learning_rate": 9.9878391959799e-06, | |
| "loss": 0.4348, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "grad_norm": 4.221704959869385, | |
| "learning_rate": 9.98532663316583e-06, | |
| "loss": 0.4203, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "grad_norm": 4.549515724182129, | |
| "learning_rate": 9.98281407035176e-06, | |
| "loss": 0.4086, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "grad_norm": 4.485387802124023, | |
| "learning_rate": 9.98030150753769e-06, | |
| "loss": 0.405, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "grad_norm": 4.758955001831055, | |
| "learning_rate": 9.977788944723619e-06, | |
| "loss": 0.4016, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "grad_norm": 4.615067005157471, | |
| "learning_rate": 9.975276381909548e-06, | |
| "loss": 0.393, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "grad_norm": 4.661777019500732, | |
| "learning_rate": 9.972763819095477e-06, | |
| "loss": 0.3843, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "grad_norm": 4.5793609619140625, | |
| "learning_rate": 9.970251256281408e-06, | |
| "loss": 0.3832, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "grad_norm": 5.030839443206787, | |
| "learning_rate": 9.967738693467338e-06, | |
| "loss": 0.3789, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "grad_norm": 4.351238250732422, | |
| "learning_rate": 9.965226130653267e-06, | |
| "loss": 0.3576, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "grad_norm": 4.560535907745361, | |
| "learning_rate": 9.962713567839198e-06, | |
| "loss": 0.3589, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "grad_norm": 4.39430046081543, | |
| "learning_rate": 9.960201005025126e-06, | |
| "loss": 0.3554, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "grad_norm": 4.813572883605957, | |
| "learning_rate": 9.957688442211057e-06, | |
| "loss": 0.3561, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "grad_norm": 3.895594358444214, | |
| "learning_rate": 9.955175879396986e-06, | |
| "loss": 0.3532, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "grad_norm": 4.176882266998291, | |
| "learning_rate": 9.952663316582915e-06, | |
| "loss": 0.3501, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "grad_norm": 4.483668327331543, | |
| "learning_rate": 9.950150753768845e-06, | |
| "loss": 0.3485, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "eval_loss": 0.23381204903125763, | |
| "eval_runtime": 566.2474, | |
| "eval_samples_per_second": 2.448, | |
| "eval_steps_per_second": 2.448, | |
| "eval_wer": 47.28059332509271, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "grad_norm": 4.013958930969238, | |
| "learning_rate": 9.947638190954774e-06, | |
| "loss": 0.3408, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "grad_norm": 4.734582424163818, | |
| "learning_rate": 9.945125628140703e-06, | |
| "loss": 0.3313, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "grad_norm": 4.5922722816467285, | |
| "learning_rate": 9.942613065326634e-06, | |
| "loss": 0.3364, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "grad_norm": 3.997859001159668, | |
| "learning_rate": 9.940100502512564e-06, | |
| "loss": 0.3283, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "grad_norm": 4.432836532592773, | |
| "learning_rate": 9.937587939698493e-06, | |
| "loss": 0.3258, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "grad_norm": 4.074716091156006, | |
| "learning_rate": 9.935075376884424e-06, | |
| "loss": 0.3338, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "grad_norm": 4.509114742279053, | |
| "learning_rate": 9.932562814070352e-06, | |
| "loss": 0.3121, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "grad_norm": 4.589898586273193, | |
| "learning_rate": 9.930050251256283e-06, | |
| "loss": 0.3161, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "grad_norm": 4.0301079750061035, | |
| "learning_rate": 9.927537688442212e-06, | |
| "loss": 0.3248, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "grad_norm": 4.21639347076416, | |
| "learning_rate": 9.925025125628141e-06, | |
| "loss": 0.3096, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "grad_norm": 4.40596866607666, | |
| "learning_rate": 9.922512562814072e-06, | |
| "loss": 0.3136, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "grad_norm": 4.144809722900391, | |
| "learning_rate": 9.920000000000002e-06, | |
| "loss": 0.3068, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "grad_norm": 3.97633695602417, | |
| "learning_rate": 9.917487437185931e-06, | |
| "loss": 0.3044, | |
| "step": 1325 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "grad_norm": 4.247403144836426, | |
| "learning_rate": 9.91497487437186e-06, | |
| "loss": 0.307, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "grad_norm": 3.9145348072052, | |
| "learning_rate": 9.91246231155779e-06, | |
| "loss": 0.3007, | |
| "step": 1375 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "grad_norm": 4.151167869567871, | |
| "learning_rate": 9.909949748743719e-06, | |
| "loss": 0.2931, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "grad_norm": 4.783816337585449, | |
| "learning_rate": 9.90743718592965e-06, | |
| "loss": 0.2939, | |
| "step": 1425 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "grad_norm": 4.319779872894287, | |
| "learning_rate": 9.904924623115578e-06, | |
| "loss": 0.294, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "grad_norm": 4.233304500579834, | |
| "learning_rate": 9.902412060301509e-06, | |
| "loss": 0.2918, | |
| "step": 1475 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "grad_norm": 5.045380592346191, | |
| "learning_rate": 9.899899497487438e-06, | |
| "loss": 0.2839, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "grad_norm": 4.171890735626221, | |
| "learning_rate": 9.897386934673367e-06, | |
| "loss": 0.277, | |
| "step": 1525 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "grad_norm": 5.11909818649292, | |
| "learning_rate": 9.894874371859298e-06, | |
| "loss": 0.2708, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "grad_norm": 4.329667568206787, | |
| "learning_rate": 9.892361809045228e-06, | |
| "loss": 0.278, | |
| "step": 1575 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "grad_norm": 4.358795166015625, | |
| "learning_rate": 9.889849246231157e-06, | |
| "loss": 0.2702, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "grad_norm": 4.983689308166504, | |
| "learning_rate": 9.887336683417086e-06, | |
| "loss": 0.2594, | |
| "step": 1625 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "grad_norm": 4.065433502197266, | |
| "learning_rate": 9.884824120603015e-06, | |
| "loss": 0.2628, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "grad_norm": 4.138759136199951, | |
| "learning_rate": 9.882311557788945e-06, | |
| "loss": 0.2692, | |
| "step": 1675 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "grad_norm": 4.43567419052124, | |
| "learning_rate": 9.879798994974876e-06, | |
| "loss": 0.2688, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "grad_norm": 4.089324951171875, | |
| "learning_rate": 9.877286432160805e-06, | |
| "loss": 0.2641, | |
| "step": 1725 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "grad_norm": 4.174434661865234, | |
| "learning_rate": 9.874773869346734e-06, | |
| "loss": 0.2638, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "grad_norm": 4.324215888977051, | |
| "learning_rate": 9.872261306532664e-06, | |
| "loss": 0.2621, | |
| "step": 1775 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "grad_norm": 4.167600631713867, | |
| "learning_rate": 9.869748743718593e-06, | |
| "loss": 0.2568, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "grad_norm": 4.090190410614014, | |
| "learning_rate": 9.867236180904524e-06, | |
| "loss": 0.2579, | |
| "step": 1825 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "grad_norm": 3.862471580505371, | |
| "learning_rate": 9.864723618090453e-06, | |
| "loss": 0.2549, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "grad_norm": 3.9046545028686523, | |
| "learning_rate": 9.862211055276383e-06, | |
| "loss": 0.2512, | |
| "step": 1875 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "grad_norm": 3.973026990890503, | |
| "learning_rate": 9.859698492462312e-06, | |
| "loss": 0.2535, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "grad_norm": 3.875776529312134, | |
| "learning_rate": 9.857185929648241e-06, | |
| "loss": 0.2454, | |
| "step": 1925 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "grad_norm": 3.815830707550049, | |
| "learning_rate": 9.854673366834172e-06, | |
| "loss": 0.2509, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "grad_norm": 3.9826467037200928, | |
| "learning_rate": 9.852160804020102e-06, | |
| "loss": 0.2469, | |
| "step": 1975 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "grad_norm": 4.199316024780273, | |
| "learning_rate": 9.849648241206031e-06, | |
| "loss": 0.2543, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "eval_loss": 0.1726374477148056, | |
| "eval_runtime": 531.5792, | |
| "eval_samples_per_second": 2.607, | |
| "eval_steps_per_second": 2.607, | |
| "eval_wer": 38.892812996644885, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "grad_norm": 4.188065052032471, | |
| "learning_rate": 9.84713567839196e-06, | |
| "loss": 0.253, | |
| "step": 2025 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "grad_norm": 4.348769187927246, | |
| "learning_rate": 9.84462311557789e-06, | |
| "loss": 0.2439, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "grad_norm": 4.025571823120117, | |
| "learning_rate": 9.842110552763819e-06, | |
| "loss": 0.2499, | |
| "step": 2075 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "grad_norm": 3.441206216812134, | |
| "learning_rate": 9.83959798994975e-06, | |
| "loss": 0.2419, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "grad_norm": 4.062358856201172, | |
| "learning_rate": 9.83708542713568e-06, | |
| "loss": 0.2428, | |
| "step": 2125 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "grad_norm": 4.701034069061279, | |
| "learning_rate": 9.834572864321609e-06, | |
| "loss": 0.2435, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "grad_norm": 4.011937618255615, | |
| "learning_rate": 9.832060301507538e-06, | |
| "loss": 0.2429, | |
| "step": 2175 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "grad_norm": 3.9073057174682617, | |
| "learning_rate": 9.829547738693467e-06, | |
| "loss": 0.2371, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "grad_norm": 4.053809642791748, | |
| "learning_rate": 9.827035175879398e-06, | |
| "loss": 0.236, | |
| "step": 2225 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "grad_norm": 3.983830690383911, | |
| "learning_rate": 9.824522613065328e-06, | |
| "loss": 0.2393, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "grad_norm": 4.095301151275635, | |
| "learning_rate": 9.822010050251257e-06, | |
| "loss": 0.2329, | |
| "step": 2275 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "grad_norm": 3.980642318725586, | |
| "learning_rate": 9.819497487437186e-06, | |
| "loss": 0.24, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "grad_norm": 4.499876976013184, | |
| "learning_rate": 9.816984924623116e-06, | |
| "loss": 0.2307, | |
| "step": 2325 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "grad_norm": 4.50550651550293, | |
| "learning_rate": 9.814472361809047e-06, | |
| "loss": 0.2336, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "grad_norm": 4.186766147613525, | |
| "learning_rate": 9.811959798994976e-06, | |
| "loss": 0.233, | |
| "step": 2375 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "grad_norm": 4.362492084503174, | |
| "learning_rate": 9.809447236180905e-06, | |
| "loss": 0.2281, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "grad_norm": 4.026979446411133, | |
| "learning_rate": 9.806934673366835e-06, | |
| "loss": 0.227, | |
| "step": 2425 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "grad_norm": 3.556326389312744, | |
| "learning_rate": 9.804422110552764e-06, | |
| "loss": 0.2331, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "grad_norm": 4.109285831451416, | |
| "learning_rate": 9.801909547738693e-06, | |
| "loss": 0.2369, | |
| "step": 2475 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "grad_norm": 4.070573329925537, | |
| "learning_rate": 9.799396984924624e-06, | |
| "loss": 0.2307, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "grad_norm": 4.282459735870361, | |
| "learning_rate": 9.796884422110554e-06, | |
| "loss": 0.2276, | |
| "step": 2525 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "grad_norm": 3.989485263824463, | |
| "learning_rate": 9.794371859296483e-06, | |
| "loss": 0.2325, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "grad_norm": 4.217010021209717, | |
| "learning_rate": 9.791859296482414e-06, | |
| "loss": 0.222, | |
| "step": 2575 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "grad_norm": 4.2022199630737305, | |
| "learning_rate": 9.789346733668342e-06, | |
| "loss": 0.2281, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "grad_norm": 3.7386114597320557, | |
| "learning_rate": 9.786834170854273e-06, | |
| "loss": 0.2165, | |
| "step": 2625 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "grad_norm": 4.048258304595947, | |
| "learning_rate": 9.784321608040202e-06, | |
| "loss": 0.2218, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "grad_norm": 4.0867133140563965, | |
| "learning_rate": 9.781809045226131e-06, | |
| "loss": 0.2197, | |
| "step": 2675 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "grad_norm": 3.8676252365112305, | |
| "learning_rate": 9.77929648241206e-06, | |
| "loss": 0.2261, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "grad_norm": 3.8840291500091553, | |
| "learning_rate": 9.77678391959799e-06, | |
| "loss": 0.2188, | |
| "step": 2725 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "grad_norm": 4.130185127258301, | |
| "learning_rate": 9.774271356783921e-06, | |
| "loss": 0.2176, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "grad_norm": 3.8641357421875, | |
| "learning_rate": 9.77175879396985e-06, | |
| "loss": 0.2165, | |
| "step": 2775 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "grad_norm": 3.8261783123016357, | |
| "learning_rate": 9.76924623115578e-06, | |
| "loss": 0.2149, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "grad_norm": 3.861722946166992, | |
| "learning_rate": 9.766733668341709e-06, | |
| "loss": 0.2122, | |
| "step": 2825 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "grad_norm": 4.013296127319336, | |
| "learning_rate": 9.76422110552764e-06, | |
| "loss": 0.2183, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "grad_norm": 3.78545880317688, | |
| "learning_rate": 9.761708542713568e-06, | |
| "loss": 0.2151, | |
| "step": 2875 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "grad_norm": 4.247804641723633, | |
| "learning_rate": 9.759195979899499e-06, | |
| "loss": 0.2213, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "grad_norm": 4.548637390136719, | |
| "learning_rate": 9.756683417085428e-06, | |
| "loss": 0.2198, | |
| "step": 2925 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "grad_norm": 3.617631435394287, | |
| "learning_rate": 9.754170854271357e-06, | |
| "loss": 0.2103, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 3.9520044326782227, | |
| "learning_rate": 9.751658291457288e-06, | |
| "loss": 0.2053, | |
| "step": 2975 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "grad_norm": 3.903465747833252, | |
| "learning_rate": 9.749145728643216e-06, | |
| "loss": 0.1961, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "eval_loss": 0.1471080482006073, | |
| "eval_runtime": 533.9926, | |
| "eval_samples_per_second": 2.596, | |
| "eval_steps_per_second": 2.596, | |
| "eval_wer": 33.365707222320324, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "grad_norm": 3.3298327922821045, | |
| "learning_rate": 9.746633165829147e-06, | |
| "loss": 0.1968, | |
| "step": 3025 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "grad_norm": 3.907670259475708, | |
| "learning_rate": 9.744120603015076e-06, | |
| "loss": 0.1938, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "grad_norm": 3.819309711456299, | |
| "learning_rate": 9.741608040201006e-06, | |
| "loss": 0.1879, | |
| "step": 3075 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "grad_norm": 4.644184112548828, | |
| "learning_rate": 9.739095477386935e-06, | |
| "loss": 0.1933, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "grad_norm": 3.5478782653808594, | |
| "learning_rate": 9.736582914572866e-06, | |
| "loss": 0.19, | |
| "step": 3125 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "grad_norm": 3.4926066398620605, | |
| "learning_rate": 9.734070351758794e-06, | |
| "loss": 0.1929, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "grad_norm": 3.6318588256835938, | |
| "learning_rate": 9.731557788944725e-06, | |
| "loss": 0.1921, | |
| "step": 3175 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "grad_norm": 4.020270824432373, | |
| "learning_rate": 9.729045226130654e-06, | |
| "loss": 0.19, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "grad_norm": 3.391878128051758, | |
| "learning_rate": 9.726532663316583e-06, | |
| "loss": 0.1912, | |
| "step": 3225 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "grad_norm": 3.8649306297302246, | |
| "learning_rate": 9.724020100502514e-06, | |
| "loss": 0.1965, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "grad_norm": 3.8927695751190186, | |
| "learning_rate": 9.721507537688444e-06, | |
| "loss": 0.1901, | |
| "step": 3275 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "grad_norm": 3.7473957538604736, | |
| "learning_rate": 9.718994974874373e-06, | |
| "loss": 0.1932, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "grad_norm": 3.2613677978515625, | |
| "learning_rate": 9.716482412060302e-06, | |
| "loss": 0.1925, | |
| "step": 3325 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "grad_norm": 4.175868988037109, | |
| "learning_rate": 9.713969849246232e-06, | |
| "loss": 0.1898, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "grad_norm": 4.236743450164795, | |
| "learning_rate": 9.711457286432163e-06, | |
| "loss": 0.1899, | |
| "step": 3375 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "grad_norm": 4.136856555938721, | |
| "learning_rate": 9.708944723618092e-06, | |
| "loss": 0.1912, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "grad_norm": 3.826167345046997, | |
| "learning_rate": 9.706432160804021e-06, | |
| "loss": 0.1909, | |
| "step": 3425 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "grad_norm": 3.949150323867798, | |
| "learning_rate": 9.70391959798995e-06, | |
| "loss": 0.1889, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "grad_norm": 4.023538589477539, | |
| "learning_rate": 9.70140703517588e-06, | |
| "loss": 0.1903, | |
| "step": 3475 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "grad_norm": 3.7844576835632324, | |
| "learning_rate": 9.698894472361809e-06, | |
| "loss": 0.1928, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "grad_norm": 3.364312171936035, | |
| "learning_rate": 9.69638190954774e-06, | |
| "loss": 0.193, | |
| "step": 3525 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "grad_norm": 3.4202849864959717, | |
| "learning_rate": 9.69386934673367e-06, | |
| "loss": 0.186, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "grad_norm": 3.6285476684570312, | |
| "learning_rate": 9.691356783919599e-06, | |
| "loss": 0.1863, | |
| "step": 3575 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "grad_norm": 4.26074743270874, | |
| "learning_rate": 9.688844221105528e-06, | |
| "loss": 0.1831, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "grad_norm": 3.6059014797210693, | |
| "learning_rate": 9.686331658291457e-06, | |
| "loss": 0.182, | |
| "step": 3625 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "grad_norm": 3.773573637008667, | |
| "learning_rate": 9.683819095477388e-06, | |
| "loss": 0.1824, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 2.47, | |
| "grad_norm": 4.112974643707275, | |
| "learning_rate": 9.681306532663318e-06, | |
| "loss": 0.1828, | |
| "step": 3675 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "grad_norm": 3.467885732650757, | |
| "learning_rate": 9.678793969849247e-06, | |
| "loss": 0.1819, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "grad_norm": 3.418673038482666, | |
| "learning_rate": 9.676281407035176e-06, | |
| "loss": 0.1831, | |
| "step": 3725 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "grad_norm": 4.263250350952148, | |
| "learning_rate": 9.673768844221106e-06, | |
| "loss": 0.186, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "grad_norm": 3.5524044036865234, | |
| "learning_rate": 9.671256281407035e-06, | |
| "loss": 0.1889, | |
| "step": 3775 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "grad_norm": 3.693559408187866, | |
| "learning_rate": 9.668743718592966e-06, | |
| "loss": 0.1842, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "grad_norm": 3.682617425918579, | |
| "learning_rate": 9.666231155778895e-06, | |
| "loss": 0.1826, | |
| "step": 3825 | |
| }, | |
| { | |
| "epoch": 2.59, | |
| "grad_norm": 3.4766149520874023, | |
| "learning_rate": 9.663718592964825e-06, | |
| "loss": 0.1766, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "grad_norm": 3.3245768547058105, | |
| "learning_rate": 9.661206030150754e-06, | |
| "loss": 0.1816, | |
| "step": 3875 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "grad_norm": 4.028345584869385, | |
| "learning_rate": 9.658693467336683e-06, | |
| "loss": 0.1803, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "grad_norm": 3.665334463119507, | |
| "learning_rate": 9.656180904522614e-06, | |
| "loss": 0.178, | |
| "step": 3925 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "grad_norm": 4.212314128875732, | |
| "learning_rate": 9.653668341708544e-06, | |
| "loss": 0.1818, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "grad_norm": 4.093043804168701, | |
| "learning_rate": 9.651155778894473e-06, | |
| "loss": 0.1784, | |
| "step": 3975 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "grad_norm": 3.8363521099090576, | |
| "learning_rate": 9.648643216080404e-06, | |
| "loss": 0.1786, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "eval_loss": 0.1332446187734604, | |
| "eval_runtime": 541.0077, | |
| "eval_samples_per_second": 2.562, | |
| "eval_steps_per_second": 2.562, | |
| "eval_wer": 30.64630054741303, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 2.7, | |
| "grad_norm": 3.6020116806030273, | |
| "learning_rate": 9.646130653266332e-06, | |
| "loss": 0.1755, | |
| "step": 4025 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "grad_norm": 3.253662586212158, | |
| "learning_rate": 9.643618090452263e-06, | |
| "loss": 0.1767, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "grad_norm": 3.3012306690216064, | |
| "learning_rate": 9.641105527638192e-06, | |
| "loss": 0.1813, | |
| "step": 4075 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "grad_norm": 3.7597391605377197, | |
| "learning_rate": 9.638592964824121e-06, | |
| "loss": 0.1806, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "grad_norm": 3.914498805999756, | |
| "learning_rate": 9.63608040201005e-06, | |
| "loss": 0.1781, | |
| "step": 4125 | |
| }, | |
| { | |
| "epoch": 2.79, | |
| "grad_norm": 3.5466084480285645, | |
| "learning_rate": 9.63356783919598e-06, | |
| "loss": 0.1784, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "grad_norm": 3.5035176277160645, | |
| "learning_rate": 9.63105527638191e-06, | |
| "loss": 0.1693, | |
| "step": 4175 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "grad_norm": 3.6211013793945312, | |
| "learning_rate": 9.62854271356784e-06, | |
| "loss": 0.1831, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "grad_norm": 3.355555772781372, | |
| "learning_rate": 9.62603015075377e-06, | |
| "loss": 0.1737, | |
| "step": 4225 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "grad_norm": 4.14884614944458, | |
| "learning_rate": 9.623517587939699e-06, | |
| "loss": 0.1773, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "grad_norm": 3.896099805831909, | |
| "learning_rate": 9.62100502512563e-06, | |
| "loss": 0.1712, | |
| "step": 4275 | |
| }, | |
| { | |
| "epoch": 2.89, | |
| "grad_norm": 3.69228196144104, | |
| "learning_rate": 9.618492462311558e-06, | |
| "loss": 0.1747, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "grad_norm": 3.4385323524475098, | |
| "learning_rate": 9.615979899497489e-06, | |
| "loss": 0.1806, | |
| "step": 4325 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "grad_norm": 4.37261438369751, | |
| "learning_rate": 9.613467336683418e-06, | |
| "loss": 0.1705, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "grad_norm": 3.549129009246826, | |
| "learning_rate": 9.610954773869347e-06, | |
| "loss": 0.1683, | |
| "step": 4375 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "grad_norm": 3.8860154151916504, | |
| "learning_rate": 9.608442211055277e-06, | |
| "loss": 0.1732, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "grad_norm": 3.8280348777770996, | |
| "learning_rate": 9.605929648241206e-06, | |
| "loss": 0.1739, | |
| "step": 4425 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "grad_norm": 3.9021239280700684, | |
| "learning_rate": 9.603417085427137e-06, | |
| "loss": 0.1744, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 3.01, | |
| "grad_norm": 3.436377763748169, | |
| "learning_rate": 9.600904522613066e-06, | |
| "loss": 0.1674, | |
| "step": 4475 | |
| }, | |
| { | |
| "epoch": 3.02, | |
| "grad_norm": 3.5898520946502686, | |
| "learning_rate": 9.598391959798996e-06, | |
| "loss": 0.1546, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 3.04, | |
| "grad_norm": 3.295307159423828, | |
| "learning_rate": 9.595879396984925e-06, | |
| "loss": 0.1543, | |
| "step": 4525 | |
| }, | |
| { | |
| "epoch": 3.06, | |
| "grad_norm": 3.3402857780456543, | |
| "learning_rate": 9.593366834170856e-06, | |
| "loss": 0.1585, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 3.07, | |
| "grad_norm": 3.4992740154266357, | |
| "learning_rate": 9.590854271356784e-06, | |
| "loss": 0.1584, | |
| "step": 4575 | |
| }, | |
| { | |
| "epoch": 3.09, | |
| "grad_norm": 3.301234245300293, | |
| "learning_rate": 9.588341708542715e-06, | |
| "loss": 0.1556, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 3.11, | |
| "grad_norm": 3.2298898696899414, | |
| "learning_rate": 9.585829145728644e-06, | |
| "loss": 0.1557, | |
| "step": 4625 | |
| }, | |
| { | |
| "epoch": 3.12, | |
| "grad_norm": 3.81208872795105, | |
| "learning_rate": 9.583316582914573e-06, | |
| "loss": 0.1488, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 3.14, | |
| "grad_norm": 3.7610058784484863, | |
| "learning_rate": 9.580804020100504e-06, | |
| "loss": 0.1537, | |
| "step": 4675 | |
| }, | |
| { | |
| "epoch": 3.16, | |
| "grad_norm": 3.394169569015503, | |
| "learning_rate": 9.578291457286432e-06, | |
| "loss": 0.1546, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 3.17, | |
| "grad_norm": 3.5936498641967773, | |
| "learning_rate": 9.575778894472363e-06, | |
| "loss": 0.1544, | |
| "step": 4725 | |
| }, | |
| { | |
| "epoch": 3.19, | |
| "grad_norm": 3.714808464050293, | |
| "learning_rate": 9.573266331658292e-06, | |
| "loss": 0.1554, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 3.21, | |
| "grad_norm": 3.731008768081665, | |
| "learning_rate": 9.570753768844222e-06, | |
| "loss": 0.157, | |
| "step": 4775 | |
| }, | |
| { | |
| "epoch": 3.22, | |
| "grad_norm": 3.4987032413482666, | |
| "learning_rate": 9.568241206030151e-06, | |
| "loss": 0.1506, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 3.24, | |
| "grad_norm": 3.487567186355591, | |
| "learning_rate": 9.565728643216082e-06, | |
| "loss": 0.1553, | |
| "step": 4825 | |
| }, | |
| { | |
| "epoch": 3.26, | |
| "grad_norm": 3.537971258163452, | |
| "learning_rate": 9.563216080402011e-06, | |
| "loss": 0.1518, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 3.27, | |
| "grad_norm": 3.5595390796661377, | |
| "learning_rate": 9.56070351758794e-06, | |
| "loss": 0.1519, | |
| "step": 4875 | |
| }, | |
| { | |
| "epoch": 3.29, | |
| "grad_norm": 3.397580146789551, | |
| "learning_rate": 9.55819095477387e-06, | |
| "loss": 0.1516, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 3.31, | |
| "grad_norm": 3.758497714996338, | |
| "learning_rate": 9.5556783919598e-06, | |
| "loss": 0.1562, | |
| "step": 4925 | |
| }, | |
| { | |
| "epoch": 3.32, | |
| "grad_norm": 3.1611812114715576, | |
| "learning_rate": 9.55316582914573e-06, | |
| "loss": 0.1522, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 3.34, | |
| "grad_norm": 3.4654600620269775, | |
| "learning_rate": 9.550653266331658e-06, | |
| "loss": 0.1544, | |
| "step": 4975 | |
| }, | |
| { | |
| "epoch": 3.36, | |
| "grad_norm": 3.0207717418670654, | |
| "learning_rate": 9.548140703517589e-06, | |
| "loss": 0.1574, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 3.36, | |
| "eval_loss": 0.1250012367963791, | |
| "eval_runtime": 532.3886, | |
| "eval_samples_per_second": 2.603, | |
| "eval_steps_per_second": 2.603, | |
| "eval_wer": 28.50962387427159, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 3.37, | |
| "grad_norm": 3.4317171573638916, | |
| "learning_rate": 9.545628140703518e-06, | |
| "loss": 0.1563, | |
| "step": 5025 | |
| }, | |
| { | |
| "epoch": 3.39, | |
| "grad_norm": 3.7062742710113525, | |
| "learning_rate": 9.543115577889448e-06, | |
| "loss": 0.1559, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 3.41, | |
| "grad_norm": 3.580521821975708, | |
| "learning_rate": 9.540603015075379e-06, | |
| "loss": 0.1521, | |
| "step": 5075 | |
| }, | |
| { | |
| "epoch": 3.43, | |
| "grad_norm": 3.364760160446167, | |
| "learning_rate": 9.538090452261308e-06, | |
| "loss": 0.1637, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 3.44, | |
| "grad_norm": 3.812782049179077, | |
| "learning_rate": 9.535577889447237e-06, | |
| "loss": 0.1564, | |
| "step": 5125 | |
| }, | |
| { | |
| "epoch": 3.46, | |
| "grad_norm": 3.065197229385376, | |
| "learning_rate": 9.533065326633166e-06, | |
| "loss": 0.1484, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 3.48, | |
| "grad_norm": 3.456214427947998, | |
| "learning_rate": 9.530552763819096e-06, | |
| "loss": 0.1517, | |
| "step": 5175 | |
| }, | |
| { | |
| "epoch": 3.49, | |
| "grad_norm": 3.731849193572998, | |
| "learning_rate": 9.528040201005025e-06, | |
| "loss": 0.1499, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 3.51, | |
| "grad_norm": 4.133150577545166, | |
| "learning_rate": 9.525527638190956e-06, | |
| "loss": 0.1475, | |
| "step": 5225 | |
| }, | |
| { | |
| "epoch": 3.53, | |
| "grad_norm": 3.353069543838501, | |
| "learning_rate": 9.523015075376885e-06, | |
| "loss": 0.1479, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 3.54, | |
| "grad_norm": 3.885782480239868, | |
| "learning_rate": 9.520502512562815e-06, | |
| "loss": 0.1458, | |
| "step": 5275 | |
| }, | |
| { | |
| "epoch": 3.56, | |
| "grad_norm": 3.5367889404296875, | |
| "learning_rate": 9.517989949748744e-06, | |
| "loss": 0.1531, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 3.58, | |
| "grad_norm": 3.8021907806396484, | |
| "learning_rate": 9.515477386934673e-06, | |
| "loss": 0.1498, | |
| "step": 5325 | |
| }, | |
| { | |
| "epoch": 3.59, | |
| "grad_norm": 3.308176279067993, | |
| "learning_rate": 9.512964824120604e-06, | |
| "loss": 0.1519, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 3.61, | |
| "grad_norm": 3.668410539627075, | |
| "learning_rate": 9.510452261306534e-06, | |
| "loss": 0.1526, | |
| "step": 5375 | |
| }, | |
| { | |
| "epoch": 3.63, | |
| "grad_norm": 3.228257417678833, | |
| "learning_rate": 9.507939698492463e-06, | |
| "loss": 0.1509, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 3.64, | |
| "grad_norm": 3.433962345123291, | |
| "learning_rate": 9.505427135678392e-06, | |
| "loss": 0.1521, | |
| "step": 5425 | |
| }, | |
| { | |
| "epoch": 3.66, | |
| "grad_norm": 3.707969903945923, | |
| "learning_rate": 9.502914572864322e-06, | |
| "loss": 0.1439, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 3.68, | |
| "grad_norm": 3.447314739227295, | |
| "learning_rate": 9.500402010050253e-06, | |
| "loss": 0.1473, | |
| "step": 5475 | |
| }, | |
| { | |
| "epoch": 3.69, | |
| "grad_norm": 3.579751968383789, | |
| "learning_rate": 9.497889447236182e-06, | |
| "loss": 0.1507, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 3.71, | |
| "grad_norm": 3.470454454421997, | |
| "learning_rate": 9.495376884422111e-06, | |
| "loss": 0.1473, | |
| "step": 5525 | |
| }, | |
| { | |
| "epoch": 3.73, | |
| "grad_norm": 3.2754967212677, | |
| "learning_rate": 9.49286432160804e-06, | |
| "loss": 0.1458, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 3.74, | |
| "grad_norm": 3.724622964859009, | |
| "learning_rate": 9.49035175879397e-06, | |
| "loss": 0.1474, | |
| "step": 5575 | |
| }, | |
| { | |
| "epoch": 3.76, | |
| "grad_norm": 3.176765203475952, | |
| "learning_rate": 9.4878391959799e-06, | |
| "loss": 0.1484, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 3.78, | |
| "grad_norm": 3.3496909141540527, | |
| "learning_rate": 9.48532663316583e-06, | |
| "loss": 0.1485, | |
| "step": 5625 | |
| }, | |
| { | |
| "epoch": 3.79, | |
| "grad_norm": 3.3814542293548584, | |
| "learning_rate": 9.48281407035176e-06, | |
| "loss": 0.1491, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 3.81, | |
| "grad_norm": 3.9236228466033936, | |
| "learning_rate": 9.480301507537689e-06, | |
| "loss": 0.1522, | |
| "step": 5675 | |
| }, | |
| { | |
| "epoch": 3.83, | |
| "grad_norm": 3.3441123962402344, | |
| "learning_rate": 9.47778894472362e-06, | |
| "loss": 0.1442, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 3.84, | |
| "grad_norm": 3.3952231407165527, | |
| "learning_rate": 9.475276381909548e-06, | |
| "loss": 0.1487, | |
| "step": 5725 | |
| }, | |
| { | |
| "epoch": 3.86, | |
| "grad_norm": 3.1410765647888184, | |
| "learning_rate": 9.472763819095479e-06, | |
| "loss": 0.1522, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 3.88, | |
| "grad_norm": 3.267335891723633, | |
| "learning_rate": 9.470251256281408e-06, | |
| "loss": 0.1425, | |
| "step": 5775 | |
| }, | |
| { | |
| "epoch": 3.9, | |
| "grad_norm": 3.547773838043213, | |
| "learning_rate": 9.467738693467337e-06, | |
| "loss": 0.1416, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 3.91, | |
| "grad_norm": 3.4462673664093018, | |
| "learning_rate": 9.465226130653267e-06, | |
| "loss": 0.1453, | |
| "step": 5825 | |
| }, | |
| { | |
| "epoch": 3.93, | |
| "grad_norm": 3.4584672451019287, | |
| "learning_rate": 9.462713567839196e-06, | |
| "loss": 0.1445, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 3.95, | |
| "grad_norm": 3.501403331756592, | |
| "learning_rate": 9.460201005025127e-06, | |
| "loss": 0.1423, | |
| "step": 5875 | |
| }, | |
| { | |
| "epoch": 3.96, | |
| "grad_norm": 3.912052631378174, | |
| "learning_rate": 9.457688442211056e-06, | |
| "loss": 0.1481, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 3.98, | |
| "grad_norm": 3.257798433303833, | |
| "learning_rate": 9.455175879396986e-06, | |
| "loss": 0.1385, | |
| "step": 5925 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 3.7633163928985596, | |
| "learning_rate": 9.452663316582915e-06, | |
| "loss": 0.1444, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 4.01, | |
| "grad_norm": 3.1884329319000244, | |
| "learning_rate": 9.450150753768846e-06, | |
| "loss": 0.1314, | |
| "step": 5975 | |
| }, | |
| { | |
| "epoch": 4.03, | |
| "grad_norm": 3.5105104446411133, | |
| "learning_rate": 9.447638190954774e-06, | |
| "loss": 0.1284, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 4.03, | |
| "eval_loss": 0.11968862265348434, | |
| "eval_runtime": 533.7582, | |
| "eval_samples_per_second": 2.597, | |
| "eval_steps_per_second": 2.597, | |
| "eval_wer": 27.644358114073814, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 4.05, | |
| "grad_norm": 3.4444563388824463, | |
| "learning_rate": 9.445125628140705e-06, | |
| "loss": 0.1278, | |
| "step": 6025 | |
| }, | |
| { | |
| "epoch": 4.06, | |
| "grad_norm": 3.446941375732422, | |
| "learning_rate": 9.442613065326634e-06, | |
| "loss": 0.1316, | |
| "step": 6050 | |
| }, | |
| { | |
| "epoch": 4.08, | |
| "grad_norm": 3.251770496368408, | |
| "learning_rate": 9.440100502512563e-06, | |
| "loss": 0.1289, | |
| "step": 6075 | |
| }, | |
| { | |
| "epoch": 4.1, | |
| "grad_norm": 3.1929450035095215, | |
| "learning_rate": 9.437587939698494e-06, | |
| "loss": 0.1257, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 4.11, | |
| "grad_norm": 3.137993097305298, | |
| "learning_rate": 9.435075376884422e-06, | |
| "loss": 0.1259, | |
| "step": 6125 | |
| }, | |
| { | |
| "epoch": 4.13, | |
| "grad_norm": 3.5924248695373535, | |
| "learning_rate": 9.432562814070353e-06, | |
| "loss": 0.1246, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 4.15, | |
| "grad_norm": 3.7657840251922607, | |
| "learning_rate": 9.430050251256282e-06, | |
| "loss": 0.1263, | |
| "step": 6175 | |
| }, | |
| { | |
| "epoch": 4.16, | |
| "grad_norm": 3.8803839683532715, | |
| "learning_rate": 9.427537688442212e-06, | |
| "loss": 0.1278, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 4.18, | |
| "grad_norm": 3.049147844314575, | |
| "learning_rate": 9.425025125628141e-06, | |
| "loss": 0.1248, | |
| "step": 6225 | |
| }, | |
| { | |
| "epoch": 4.2, | |
| "grad_norm": 3.5847809314727783, | |
| "learning_rate": 9.422512562814072e-06, | |
| "loss": 0.1326, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 4.21, | |
| "grad_norm": 3.208193063735962, | |
| "learning_rate": 9.42e-06, | |
| "loss": 0.1278, | |
| "step": 6275 | |
| }, | |
| { | |
| "epoch": 4.23, | |
| "grad_norm": 3.787940740585327, | |
| "learning_rate": 9.41748743718593e-06, | |
| "loss": 0.1286, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 4.25, | |
| "grad_norm": 2.801053762435913, | |
| "learning_rate": 9.41497487437186e-06, | |
| "loss": 0.1309, | |
| "step": 6325 | |
| }, | |
| { | |
| "epoch": 4.26, | |
| "grad_norm": 3.1014838218688965, | |
| "learning_rate": 9.41246231155779e-06, | |
| "loss": 0.1265, | |
| "step": 6350 | |
| }, | |
| { | |
| "epoch": 4.28, | |
| "grad_norm": 3.3319690227508545, | |
| "learning_rate": 9.40994974874372e-06, | |
| "loss": 0.1273, | |
| "step": 6375 | |
| }, | |
| { | |
| "epoch": 4.3, | |
| "grad_norm": 3.366464376449585, | |
| "learning_rate": 9.407437185929648e-06, | |
| "loss": 0.1266, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 4.31, | |
| "grad_norm": 3.5356907844543457, | |
| "learning_rate": 9.404924623115579e-06, | |
| "loss": 0.13, | |
| "step": 6425 | |
| }, | |
| { | |
| "epoch": 4.33, | |
| "grad_norm": 3.325680732727051, | |
| "learning_rate": 9.402412060301508e-06, | |
| "loss": 0.1262, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 4.35, | |
| "grad_norm": 3.4266843795776367, | |
| "learning_rate": 9.399899497487438e-06, | |
| "loss": 0.1284, | |
| "step": 6475 | |
| }, | |
| { | |
| "epoch": 4.37, | |
| "grad_norm": 3.1395492553710938, | |
| "learning_rate": 9.397386934673369e-06, | |
| "loss": 0.1276, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 4.38, | |
| "grad_norm": 3.323065757751465, | |
| "learning_rate": 9.394874371859298e-06, | |
| "loss": 0.1246, | |
| "step": 6525 | |
| }, | |
| { | |
| "epoch": 4.4, | |
| "grad_norm": 3.3577070236206055, | |
| "learning_rate": 9.392361809045227e-06, | |
| "loss": 0.1326, | |
| "step": 6550 | |
| }, | |
| { | |
| "epoch": 4.42, | |
| "grad_norm": 3.4483211040496826, | |
| "learning_rate": 9.389849246231157e-06, | |
| "loss": 0.1287, | |
| "step": 6575 | |
| }, | |
| { | |
| "epoch": 4.43, | |
| "grad_norm": 3.939202308654785, | |
| "learning_rate": 9.387336683417086e-06, | |
| "loss": 0.1295, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 4.45, | |
| "grad_norm": 3.5882346630096436, | |
| "learning_rate": 9.384824120603015e-06, | |
| "loss": 0.1257, | |
| "step": 6625 | |
| }, | |
| { | |
| "epoch": 4.47, | |
| "grad_norm": 3.9268131256103516, | |
| "learning_rate": 9.382311557788946e-06, | |
| "loss": 0.1308, | |
| "step": 6650 | |
| }, | |
| { | |
| "epoch": 4.48, | |
| "grad_norm": 3.2181479930877686, | |
| "learning_rate": 9.379798994974874e-06, | |
| "loss": 0.1289, | |
| "step": 6675 | |
| }, | |
| { | |
| "epoch": 4.5, | |
| "grad_norm": 3.4467923641204834, | |
| "learning_rate": 9.377286432160805e-06, | |
| "loss": 0.1286, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 4.52, | |
| "grad_norm": 3.227398157119751, | |
| "learning_rate": 9.374773869346734e-06, | |
| "loss": 0.1279, | |
| "step": 6725 | |
| }, | |
| { | |
| "epoch": 4.53, | |
| "grad_norm": 3.5086820125579834, | |
| "learning_rate": 9.372261306532664e-06, | |
| "loss": 0.1295, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 4.55, | |
| "grad_norm": 3.0712101459503174, | |
| "learning_rate": 9.369748743718595e-06, | |
| "loss": 0.1285, | |
| "step": 6775 | |
| }, | |
| { | |
| "epoch": 4.57, | |
| "grad_norm": 3.3961784839630127, | |
| "learning_rate": 9.367236180904524e-06, | |
| "loss": 0.1311, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 4.58, | |
| "grad_norm": 3.6800429821014404, | |
| "learning_rate": 9.364723618090453e-06, | |
| "loss": 0.1284, | |
| "step": 6825 | |
| }, | |
| { | |
| "epoch": 4.6, | |
| "grad_norm": 3.6793227195739746, | |
| "learning_rate": 9.362211055276383e-06, | |
| "loss": 0.1311, | |
| "step": 6850 | |
| }, | |
| { | |
| "epoch": 4.62, | |
| "grad_norm": 3.1020681858062744, | |
| "learning_rate": 9.359698492462312e-06, | |
| "loss": 0.1287, | |
| "step": 6875 | |
| }, | |
| { | |
| "epoch": 4.63, | |
| "grad_norm": 3.738802909851074, | |
| "learning_rate": 9.357185929648241e-06, | |
| "loss": 0.1241, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 4.65, | |
| "grad_norm": 3.344667911529541, | |
| "learning_rate": 9.354673366834172e-06, | |
| "loss": 0.1247, | |
| "step": 6925 | |
| }, | |
| { | |
| "epoch": 4.67, | |
| "grad_norm": 3.011655330657959, | |
| "learning_rate": 9.352160804020101e-06, | |
| "loss": 0.1237, | |
| "step": 6950 | |
| }, | |
| { | |
| "epoch": 4.68, | |
| "grad_norm": 3.486971139907837, | |
| "learning_rate": 9.34964824120603e-06, | |
| "loss": 0.1275, | |
| "step": 6975 | |
| }, | |
| { | |
| "epoch": 4.7, | |
| "grad_norm": 3.265568971633911, | |
| "learning_rate": 9.34713567839196e-06, | |
| "loss": 0.1216, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 4.7, | |
| "eval_loss": 0.11660390347242355, | |
| "eval_runtime": 534.0823, | |
| "eval_samples_per_second": 2.595, | |
| "eval_steps_per_second": 2.595, | |
| "eval_wer": 26.823238566131025, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 4.72, | |
| "grad_norm": 3.0894901752471924, | |
| "learning_rate": 9.34462311557789e-06, | |
| "loss": 0.1258, | |
| "step": 7025 | |
| }, | |
| { | |
| "epoch": 4.73, | |
| "grad_norm": 3.5530054569244385, | |
| "learning_rate": 9.34211055276382e-06, | |
| "loss": 0.1294, | |
| "step": 7050 | |
| }, | |
| { | |
| "epoch": 4.75, | |
| "grad_norm": 3.127763271331787, | |
| "learning_rate": 9.33959798994975e-06, | |
| "loss": 0.129, | |
| "step": 7075 | |
| }, | |
| { | |
| "epoch": 4.77, | |
| "grad_norm": 3.453204393386841, | |
| "learning_rate": 9.337085427135679e-06, | |
| "loss": 0.1264, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 4.79, | |
| "grad_norm": 3.470991611480713, | |
| "learning_rate": 9.334572864321608e-06, | |
| "loss": 0.1272, | |
| "step": 7125 | |
| }, | |
| { | |
| "epoch": 4.8, | |
| "grad_norm": 3.498213768005371, | |
| "learning_rate": 9.332060301507538e-06, | |
| "loss": 0.1257, | |
| "step": 7150 | |
| }, | |
| { | |
| "epoch": 4.82, | |
| "grad_norm": 3.052225351333618, | |
| "learning_rate": 9.329547738693469e-06, | |
| "loss": 0.1242, | |
| "step": 7175 | |
| }, | |
| { | |
| "epoch": 4.84, | |
| "grad_norm": 2.9512875080108643, | |
| "learning_rate": 9.327035175879398e-06, | |
| "loss": 0.1226, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 4.85, | |
| "grad_norm": 3.124257802963257, | |
| "learning_rate": 9.324522613065327e-06, | |
| "loss": 0.1276, | |
| "step": 7225 | |
| }, | |
| { | |
| "epoch": 4.87, | |
| "grad_norm": 3.763948678970337, | |
| "learning_rate": 9.322010050251257e-06, | |
| "loss": 0.123, | |
| "step": 7250 | |
| }, | |
| { | |
| "epoch": 4.89, | |
| "grad_norm": 3.859360694885254, | |
| "learning_rate": 9.319497487437186e-06, | |
| "loss": 0.1288, | |
| "step": 7275 | |
| }, | |
| { | |
| "epoch": 4.9, | |
| "grad_norm": 3.406261682510376, | |
| "learning_rate": 9.316984924623115e-06, | |
| "loss": 0.1268, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 4.92, | |
| "grad_norm": 3.5981762409210205, | |
| "learning_rate": 9.314472361809046e-06, | |
| "loss": 0.1267, | |
| "step": 7325 | |
| }, | |
| { | |
| "epoch": 4.94, | |
| "grad_norm": 3.2677414417266846, | |
| "learning_rate": 9.311959798994976e-06, | |
| "loss": 0.1228, | |
| "step": 7350 | |
| }, | |
| { | |
| "epoch": 4.95, | |
| "grad_norm": 3.4176025390625, | |
| "learning_rate": 9.309447236180905e-06, | |
| "loss": 0.1292, | |
| "step": 7375 | |
| }, | |
| { | |
| "epoch": 4.97, | |
| "grad_norm": 3.702085018157959, | |
| "learning_rate": 9.306934673366836e-06, | |
| "loss": 0.1212, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 4.99, | |
| "grad_norm": 3.075143337249756, | |
| "learning_rate": 9.304422110552764e-06, | |
| "loss": 0.1208, | |
| "step": 7425 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 2.96437406539917, | |
| "learning_rate": 9.301909547738695e-06, | |
| "loss": 0.1223, | |
| "step": 7450 | |
| }, | |
| { | |
| "epoch": 5.02, | |
| "grad_norm": 3.359867572784424, | |
| "learning_rate": 9.299396984924624e-06, | |
| "loss": 0.1083, | |
| "step": 7475 | |
| }, | |
| { | |
| "epoch": 5.04, | |
| "grad_norm": 3.1340601444244385, | |
| "learning_rate": 9.296884422110553e-06, | |
| "loss": 0.1088, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 5.05, | |
| "grad_norm": 3.4933323860168457, | |
| "learning_rate": 9.294371859296483e-06, | |
| "loss": 0.1103, | |
| "step": 7525 | |
| }, | |
| { | |
| "epoch": 5.07, | |
| "grad_norm": 2.8419055938720703, | |
| "learning_rate": 9.291859296482412e-06, | |
| "loss": 0.1074, | |
| "step": 7550 | |
| }, | |
| { | |
| "epoch": 5.09, | |
| "grad_norm": 2.699908971786499, | |
| "learning_rate": 9.289346733668343e-06, | |
| "loss": 0.1074, | |
| "step": 7575 | |
| }, | |
| { | |
| "epoch": 5.1, | |
| "grad_norm": 3.4752280712127686, | |
| "learning_rate": 9.286834170854272e-06, | |
| "loss": 0.1074, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 5.12, | |
| "grad_norm": 3.5037472248077393, | |
| "learning_rate": 9.284321608040202e-06, | |
| "loss": 0.1114, | |
| "step": 7625 | |
| }, | |
| { | |
| "epoch": 5.14, | |
| "grad_norm": 3.3195717334747314, | |
| "learning_rate": 9.281809045226131e-06, | |
| "loss": 0.1111, | |
| "step": 7650 | |
| }, | |
| { | |
| "epoch": 5.15, | |
| "grad_norm": 3.210256338119507, | |
| "learning_rate": 9.279296482412062e-06, | |
| "loss": 0.1095, | |
| "step": 7675 | |
| }, | |
| { | |
| "epoch": 5.17, | |
| "grad_norm": 3.4619410037994385, | |
| "learning_rate": 9.27678391959799e-06, | |
| "loss": 0.1118, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 5.19, | |
| "grad_norm": 3.2132604122161865, | |
| "learning_rate": 9.27427135678392e-06, | |
| "loss": 0.1065, | |
| "step": 7725 | |
| }, | |
| { | |
| "epoch": 5.2, | |
| "grad_norm": 3.379657030105591, | |
| "learning_rate": 9.27175879396985e-06, | |
| "loss": 0.1121, | |
| "step": 7750 | |
| }, | |
| { | |
| "epoch": 5.22, | |
| "grad_norm": 3.6748008728027344, | |
| "learning_rate": 9.26924623115578e-06, | |
| "loss": 0.1062, | |
| "step": 7775 | |
| }, | |
| { | |
| "epoch": 5.24, | |
| "grad_norm": 3.063694715499878, | |
| "learning_rate": 9.26673366834171e-06, | |
| "loss": 0.1077, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 5.26, | |
| "grad_norm": 3.237032413482666, | |
| "learning_rate": 9.264221105527638e-06, | |
| "loss": 0.1079, | |
| "step": 7825 | |
| }, | |
| { | |
| "epoch": 5.27, | |
| "grad_norm": 3.2364072799682617, | |
| "learning_rate": 9.261708542713569e-06, | |
| "loss": 0.1075, | |
| "step": 7850 | |
| }, | |
| { | |
| "epoch": 5.29, | |
| "grad_norm": 3.2186496257781982, | |
| "learning_rate": 9.259195979899498e-06, | |
| "loss": 0.107, | |
| "step": 7875 | |
| }, | |
| { | |
| "epoch": 5.31, | |
| "grad_norm": 3.249338150024414, | |
| "learning_rate": 9.256683417085428e-06, | |
| "loss": 0.1101, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 5.32, | |
| "grad_norm": 2.9037253856658936, | |
| "learning_rate": 9.254170854271357e-06, | |
| "loss": 0.1049, | |
| "step": 7925 | |
| }, | |
| { | |
| "epoch": 5.34, | |
| "grad_norm": 3.467984914779663, | |
| "learning_rate": 9.251658291457288e-06, | |
| "loss": 0.1164, | |
| "step": 7950 | |
| }, | |
| { | |
| "epoch": 5.36, | |
| "grad_norm": 3.047340154647827, | |
| "learning_rate": 9.249145728643217e-06, | |
| "loss": 0.1079, | |
| "step": 7975 | |
| }, | |
| { | |
| "epoch": 5.37, | |
| "grad_norm": 3.2782435417175293, | |
| "learning_rate": 9.246633165829147e-06, | |
| "loss": 0.1063, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 5.37, | |
| "eval_loss": 0.11793605983257294, | |
| "eval_runtime": 535.0689, | |
| "eval_samples_per_second": 2.59, | |
| "eval_steps_per_second": 2.59, | |
| "eval_wer": 27.19406674907293, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 5.39, | |
| "grad_norm": 3.045055389404297, | |
| "learning_rate": 9.244120603015076e-06, | |
| "loss": 0.1068, | |
| "step": 8025 | |
| }, | |
| { | |
| "epoch": 5.41, | |
| "grad_norm": 3.3729374408721924, | |
| "learning_rate": 9.241608040201005e-06, | |
| "loss": 0.1097, | |
| "step": 8050 | |
| }, | |
| { | |
| "epoch": 5.42, | |
| "grad_norm": 3.581709861755371, | |
| "learning_rate": 9.239095477386936e-06, | |
| "loss": 0.109, | |
| "step": 8075 | |
| }, | |
| { | |
| "epoch": 5.44, | |
| "grad_norm": 3.690354585647583, | |
| "learning_rate": 9.236582914572864e-06, | |
| "loss": 0.1105, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 5.46, | |
| "grad_norm": 3.4395689964294434, | |
| "learning_rate": 9.234070351758795e-06, | |
| "loss": 0.1047, | |
| "step": 8125 | |
| }, | |
| { | |
| "epoch": 5.47, | |
| "grad_norm": 3.5904619693756104, | |
| "learning_rate": 9.231557788944724e-06, | |
| "loss": 0.1098, | |
| "step": 8150 | |
| }, | |
| { | |
| "epoch": 5.49, | |
| "grad_norm": 3.4449338912963867, | |
| "learning_rate": 9.229045226130654e-06, | |
| "loss": 0.1094, | |
| "step": 8175 | |
| }, | |
| { | |
| "epoch": 5.51, | |
| "grad_norm": 3.081770181655884, | |
| "learning_rate": 9.226532663316585e-06, | |
| "loss": 0.1046, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 5.52, | |
| "grad_norm": 3.2109663486480713, | |
| "learning_rate": 9.224020100502514e-06, | |
| "loss": 0.11, | |
| "step": 8225 | |
| }, | |
| { | |
| "epoch": 5.54, | |
| "grad_norm": 3.8002219200134277, | |
| "learning_rate": 9.221507537688443e-06, | |
| "loss": 0.1103, | |
| "step": 8250 | |
| }, | |
| { | |
| "epoch": 5.56, | |
| "grad_norm": 3.423508644104004, | |
| "learning_rate": 9.218994974874373e-06, | |
| "loss": 0.1046, | |
| "step": 8275 | |
| }, | |
| { | |
| "epoch": 5.57, | |
| "grad_norm": 3.408816337585449, | |
| "learning_rate": 9.216482412060302e-06, | |
| "loss": 0.1137, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 5.59, | |
| "grad_norm": 3.349015235900879, | |
| "learning_rate": 9.213969849246231e-06, | |
| "loss": 0.1063, | |
| "step": 8325 | |
| }, | |
| { | |
| "epoch": 5.61, | |
| "grad_norm": 3.255462169647217, | |
| "learning_rate": 9.211457286432162e-06, | |
| "loss": 0.1081, | |
| "step": 8350 | |
| }, | |
| { | |
| "epoch": 5.62, | |
| "grad_norm": 3.0760374069213867, | |
| "learning_rate": 9.20894472361809e-06, | |
| "loss": 0.1124, | |
| "step": 8375 | |
| }, | |
| { | |
| "epoch": 5.64, | |
| "grad_norm": 3.469221830368042, | |
| "learning_rate": 9.206432160804021e-06, | |
| "loss": 0.1095, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 5.66, | |
| "grad_norm": 3.20563006401062, | |
| "learning_rate": 9.20391959798995e-06, | |
| "loss": 0.1091, | |
| "step": 8425 | |
| }, | |
| { | |
| "epoch": 5.67, | |
| "grad_norm": 3.58202862739563, | |
| "learning_rate": 9.20140703517588e-06, | |
| "loss": 0.1084, | |
| "step": 8450 | |
| }, | |
| { | |
| "epoch": 5.69, | |
| "grad_norm": 3.2912611961364746, | |
| "learning_rate": 9.19889447236181e-06, | |
| "loss": 0.109, | |
| "step": 8475 | |
| }, | |
| { | |
| "epoch": 5.71, | |
| "grad_norm": 3.2603135108947754, | |
| "learning_rate": 9.19638190954774e-06, | |
| "loss": 0.1051, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 5.73, | |
| "grad_norm": 3.3398075103759766, | |
| "learning_rate": 9.19386934673367e-06, | |
| "loss": 0.1086, | |
| "step": 8525 | |
| }, | |
| { | |
| "epoch": 5.74, | |
| "grad_norm": 3.480815887451172, | |
| "learning_rate": 9.191356783919599e-06, | |
| "loss": 0.1059, | |
| "step": 8550 | |
| }, | |
| { | |
| "epoch": 5.76, | |
| "grad_norm": 3.1898598670959473, | |
| "learning_rate": 9.188844221105528e-06, | |
| "loss": 0.104, | |
| "step": 8575 | |
| }, | |
| { | |
| "epoch": 5.78, | |
| "grad_norm": 3.3440845012664795, | |
| "learning_rate": 9.186331658291459e-06, | |
| "loss": 0.1126, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 5.79, | |
| "grad_norm": 3.6762936115264893, | |
| "learning_rate": 9.183819095477388e-06, | |
| "loss": 0.1064, | |
| "step": 8625 | |
| }, | |
| { | |
| "epoch": 5.81, | |
| "grad_norm": 3.66489315032959, | |
| "learning_rate": 9.181306532663317e-06, | |
| "loss": 0.1096, | |
| "step": 8650 | |
| }, | |
| { | |
| "epoch": 5.83, | |
| "grad_norm": 3.3494789600372314, | |
| "learning_rate": 9.178793969849247e-06, | |
| "loss": 0.1111, | |
| "step": 8675 | |
| }, | |
| { | |
| "epoch": 5.84, | |
| "grad_norm": 3.4388012886047363, | |
| "learning_rate": 9.176281407035176e-06, | |
| "loss": 0.1107, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 5.86, | |
| "grad_norm": 3.2451605796813965, | |
| "learning_rate": 9.173768844221105e-06, | |
| "loss": 0.1089, | |
| "step": 8725 | |
| }, | |
| { | |
| "epoch": 5.88, | |
| "grad_norm": 3.6606147289276123, | |
| "learning_rate": 9.171256281407036e-06, | |
| "loss": 0.1084, | |
| "step": 8750 | |
| }, | |
| { | |
| "epoch": 5.89, | |
| "grad_norm": 3.2145121097564697, | |
| "learning_rate": 9.168743718592966e-06, | |
| "loss": 0.1063, | |
| "step": 8775 | |
| }, | |
| { | |
| "epoch": 5.91, | |
| "grad_norm": 3.6518869400024414, | |
| "learning_rate": 9.166231155778895e-06, | |
| "loss": 0.1043, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 5.93, | |
| "grad_norm": 2.9405784606933594, | |
| "learning_rate": 9.163718592964826e-06, | |
| "loss": 0.1117, | |
| "step": 8825 | |
| }, | |
| { | |
| "epoch": 5.94, | |
| "grad_norm": 3.5626678466796875, | |
| "learning_rate": 9.161206030150754e-06, | |
| "loss": 0.1048, | |
| "step": 8850 | |
| }, | |
| { | |
| "epoch": 5.96, | |
| "grad_norm": 3.2351441383361816, | |
| "learning_rate": 9.158693467336685e-06, | |
| "loss": 0.1093, | |
| "step": 8875 | |
| }, | |
| { | |
| "epoch": 5.98, | |
| "grad_norm": 3.439530372619629, | |
| "learning_rate": 9.156180904522614e-06, | |
| "loss": 0.1073, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 5.99, | |
| "grad_norm": 3.4655144214630127, | |
| "learning_rate": 9.153668341708543e-06, | |
| "loss": 0.1067, | |
| "step": 8925 | |
| }, | |
| { | |
| "epoch": 6.01, | |
| "grad_norm": 2.794478178024292, | |
| "learning_rate": 9.151155778894473e-06, | |
| "loss": 0.0995, | |
| "step": 8950 | |
| }, | |
| { | |
| "epoch": 6.03, | |
| "grad_norm": 3.5291810035705566, | |
| "learning_rate": 9.148643216080402e-06, | |
| "loss": 0.0925, | |
| "step": 8975 | |
| }, | |
| { | |
| "epoch": 6.04, | |
| "grad_norm": 2.9376721382141113, | |
| "learning_rate": 9.146130653266331e-06, | |
| "loss": 0.0879, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 6.04, | |
| "eval_loss": 0.1166120246052742, | |
| "eval_runtime": 531.7233, | |
| "eval_samples_per_second": 2.607, | |
| "eval_steps_per_second": 2.607, | |
| "eval_wer": 26.876214020837015, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 6.06, | |
| "grad_norm": 3.121159076690674, | |
| "learning_rate": 9.143618090452262e-06, | |
| "loss": 0.0925, | |
| "step": 9025 | |
| }, | |
| { | |
| "epoch": 6.08, | |
| "grad_norm": 3.190279722213745, | |
| "learning_rate": 9.141105527638192e-06, | |
| "loss": 0.0966, | |
| "step": 9050 | |
| }, | |
| { | |
| "epoch": 6.09, | |
| "grad_norm": 2.9551713466644287, | |
| "learning_rate": 9.138592964824121e-06, | |
| "loss": 0.0927, | |
| "step": 9075 | |
| }, | |
| { | |
| "epoch": 6.11, | |
| "grad_norm": 2.6916284561157227, | |
| "learning_rate": 9.136080402010052e-06, | |
| "loss": 0.0905, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 6.13, | |
| "grad_norm": 3.1297528743743896, | |
| "learning_rate": 9.13356783919598e-06, | |
| "loss": 0.0966, | |
| "step": 9125 | |
| }, | |
| { | |
| "epoch": 6.15, | |
| "grad_norm": 3.3253660202026367, | |
| "learning_rate": 9.13105527638191e-06, | |
| "loss": 0.0977, | |
| "step": 9150 | |
| }, | |
| { | |
| "epoch": 6.16, | |
| "grad_norm": 3.1732029914855957, | |
| "learning_rate": 9.12854271356784e-06, | |
| "loss": 0.0943, | |
| "step": 9175 | |
| }, | |
| { | |
| "epoch": 6.18, | |
| "grad_norm": 3.00846791267395, | |
| "learning_rate": 9.12603015075377e-06, | |
| "loss": 0.095, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 6.2, | |
| "grad_norm": 3.4318153858184814, | |
| "learning_rate": 9.1235175879397e-06, | |
| "loss": 0.0918, | |
| "step": 9225 | |
| }, | |
| { | |
| "epoch": 6.21, | |
| "grad_norm": 2.615586519241333, | |
| "learning_rate": 9.121005025125628e-06, | |
| "loss": 0.0918, | |
| "step": 9250 | |
| }, | |
| { | |
| "epoch": 6.23, | |
| "grad_norm": 3.2654173374176025, | |
| "learning_rate": 9.118492462311559e-06, | |
| "loss": 0.0892, | |
| "step": 9275 | |
| }, | |
| { | |
| "epoch": 6.25, | |
| "grad_norm": 3.255948066711426, | |
| "learning_rate": 9.115979899497488e-06, | |
| "loss": 0.0955, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 6.26, | |
| "grad_norm": 3.593632221221924, | |
| "learning_rate": 9.113467336683418e-06, | |
| "loss": 0.0951, | |
| "step": 9325 | |
| }, | |
| { | |
| "epoch": 6.28, | |
| "grad_norm": 3.3398244380950928, | |
| "learning_rate": 9.110954773869347e-06, | |
| "loss": 0.0926, | |
| "step": 9350 | |
| }, | |
| { | |
| "epoch": 6.3, | |
| "grad_norm": 3.4789888858795166, | |
| "learning_rate": 9.108442211055278e-06, | |
| "loss": 0.0946, | |
| "step": 9375 | |
| }, | |
| { | |
| "epoch": 6.31, | |
| "grad_norm": 3.1585254669189453, | |
| "learning_rate": 9.105929648241206e-06, | |
| "loss": 0.0921, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 6.33, | |
| "grad_norm": 3.3125743865966797, | |
| "learning_rate": 9.103417085427137e-06, | |
| "loss": 0.0912, | |
| "step": 9425 | |
| }, | |
| { | |
| "epoch": 6.35, | |
| "grad_norm": 2.899616241455078, | |
| "learning_rate": 9.100904522613066e-06, | |
| "loss": 0.0935, | |
| "step": 9450 | |
| }, | |
| { | |
| "epoch": 6.36, | |
| "grad_norm": 2.9725539684295654, | |
| "learning_rate": 9.098391959798995e-06, | |
| "loss": 0.0934, | |
| "step": 9475 | |
| }, | |
| { | |
| "epoch": 6.38, | |
| "grad_norm": 3.340712070465088, | |
| "learning_rate": 9.095879396984926e-06, | |
| "loss": 0.0968, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 6.4, | |
| "grad_norm": 3.4166252613067627, | |
| "learning_rate": 9.093366834170854e-06, | |
| "loss": 0.0902, | |
| "step": 9525 | |
| }, | |
| { | |
| "epoch": 6.41, | |
| "grad_norm": 3.42030930519104, | |
| "learning_rate": 9.090854271356785e-06, | |
| "loss": 0.0903, | |
| "step": 9550 | |
| }, | |
| { | |
| "epoch": 6.43, | |
| "grad_norm": 2.913060188293457, | |
| "learning_rate": 9.088341708542714e-06, | |
| "loss": 0.0967, | |
| "step": 9575 | |
| }, | |
| { | |
| "epoch": 6.45, | |
| "grad_norm": 2.9808599948883057, | |
| "learning_rate": 9.085829145728644e-06, | |
| "loss": 0.0898, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 6.46, | |
| "grad_norm": 3.318812847137451, | |
| "learning_rate": 9.083316582914573e-06, | |
| "loss": 0.0964, | |
| "step": 9625 | |
| }, | |
| { | |
| "epoch": 6.48, | |
| "grad_norm": 2.8281571865081787, | |
| "learning_rate": 9.080804020100504e-06, | |
| "loss": 0.0925, | |
| "step": 9650 | |
| }, | |
| { | |
| "epoch": 6.5, | |
| "grad_norm": 3.3148748874664307, | |
| "learning_rate": 9.078291457286433e-06, | |
| "loss": 0.0955, | |
| "step": 9675 | |
| }, | |
| { | |
| "epoch": 6.51, | |
| "grad_norm": 3.047445297241211, | |
| "learning_rate": 9.075778894472363e-06, | |
| "loss": 0.0907, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 6.53, | |
| "grad_norm": 3.201747417449951, | |
| "learning_rate": 9.073266331658292e-06, | |
| "loss": 0.0891, | |
| "step": 9725 | |
| }, | |
| { | |
| "epoch": 6.55, | |
| "grad_norm": 3.5526840686798096, | |
| "learning_rate": 9.070753768844221e-06, | |
| "loss": 0.0937, | |
| "step": 9750 | |
| }, | |
| { | |
| "epoch": 6.56, | |
| "grad_norm": 3.3490021228790283, | |
| "learning_rate": 9.068241206030152e-06, | |
| "loss": 0.0924, | |
| "step": 9775 | |
| }, | |
| { | |
| "epoch": 6.58, | |
| "grad_norm": 3.195934534072876, | |
| "learning_rate": 9.06572864321608e-06, | |
| "loss": 0.0941, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 6.6, | |
| "grad_norm": 3.1133546829223633, | |
| "learning_rate": 9.063216080402011e-06, | |
| "loss": 0.093, | |
| "step": 9825 | |
| }, | |
| { | |
| "epoch": 6.62, | |
| "grad_norm": 3.5979671478271484, | |
| "learning_rate": 9.06070351758794e-06, | |
| "loss": 0.0881, | |
| "step": 9850 | |
| }, | |
| { | |
| "epoch": 6.63, | |
| "grad_norm": 3.7291669845581055, | |
| "learning_rate": 9.05819095477387e-06, | |
| "loss": 0.0955, | |
| "step": 9875 | |
| }, | |
| { | |
| "epoch": 6.65, | |
| "grad_norm": 3.2835400104522705, | |
| "learning_rate": 9.0556783919598e-06, | |
| "loss": 0.0902, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 6.67, | |
| "grad_norm": 3.1277029514312744, | |
| "learning_rate": 9.05316582914573e-06, | |
| "loss": 0.0936, | |
| "step": 9925 | |
| }, | |
| { | |
| "epoch": 6.68, | |
| "grad_norm": 3.2376766204833984, | |
| "learning_rate": 9.05065326633166e-06, | |
| "loss": 0.0896, | |
| "step": 9950 | |
| }, | |
| { | |
| "epoch": 6.7, | |
| "grad_norm": 2.698474168777466, | |
| "learning_rate": 9.048140703517589e-06, | |
| "loss": 0.0915, | |
| "step": 9975 | |
| }, | |
| { | |
| "epoch": 6.72, | |
| "grad_norm": 3.623647451400757, | |
| "learning_rate": 9.045628140703518e-06, | |
| "loss": 0.0924, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 6.72, | |
| "eval_loss": 0.1171552762389183, | |
| "eval_runtime": 533.9147, | |
| "eval_samples_per_second": 2.596, | |
| "eval_steps_per_second": 2.596, | |
| "eval_wer": 26.558361292601095, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 6.73, | |
| "grad_norm": 3.608774423599243, | |
| "learning_rate": 9.043115577889447e-06, | |
| "loss": 0.0927, | |
| "step": 10025 | |
| }, | |
| { | |
| "epoch": 6.75, | |
| "grad_norm": Infinity, | |
| "learning_rate": 9.04070351758794e-06, | |
| "loss": 0.0952, | |
| "step": 10050 | |
| }, | |
| { | |
| "epoch": 6.77, | |
| "grad_norm": 2.832880735397339, | |
| "learning_rate": 9.03819095477387e-06, | |
| "loss": 0.0909, | |
| "step": 10075 | |
| }, | |
| { | |
| "epoch": 6.78, | |
| "grad_norm": 3.0156736373901367, | |
| "learning_rate": 9.0356783919598e-06, | |
| "loss": 0.0944, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 6.8, | |
| "grad_norm": 3.3390650749206543, | |
| "learning_rate": 9.033165829145728e-06, | |
| "loss": 0.0919, | |
| "step": 10125 | |
| }, | |
| { | |
| "epoch": 6.82, | |
| "grad_norm": 3.394937515258789, | |
| "learning_rate": 9.03065326633166e-06, | |
| "loss": 0.0932, | |
| "step": 10150 | |
| }, | |
| { | |
| "epoch": 6.83, | |
| "grad_norm": 3.443366765975952, | |
| "learning_rate": 9.028140703517589e-06, | |
| "loss": 0.0934, | |
| "step": 10175 | |
| }, | |
| { | |
| "epoch": 6.85, | |
| "grad_norm": 3.167790174484253, | |
| "learning_rate": 9.025628140703518e-06, | |
| "loss": 0.0934, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 6.87, | |
| "grad_norm": 3.151536464691162, | |
| "learning_rate": 9.023115577889447e-06, | |
| "loss": 0.0935, | |
| "step": 10225 | |
| }, | |
| { | |
| "epoch": 6.88, | |
| "grad_norm": 3.475541114807129, | |
| "learning_rate": 9.020603015075378e-06, | |
| "loss": 0.0924, | |
| "step": 10250 | |
| }, | |
| { | |
| "epoch": 6.9, | |
| "grad_norm": 3.254150629043579, | |
| "learning_rate": 9.018090452261308e-06, | |
| "loss": 0.0946, | |
| "step": 10275 | |
| }, | |
| { | |
| "epoch": 6.92, | |
| "grad_norm": 3.126755714416504, | |
| "learning_rate": 9.015577889447237e-06, | |
| "loss": 0.0921, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 6.93, | |
| "grad_norm": 3.1626737117767334, | |
| "learning_rate": 9.013065326633166e-06, | |
| "loss": 0.0908, | |
| "step": 10325 | |
| }, | |
| { | |
| "epoch": 6.95, | |
| "grad_norm": 3.488074779510498, | |
| "learning_rate": 9.010552763819096e-06, | |
| "loss": 0.0956, | |
| "step": 10350 | |
| }, | |
| { | |
| "epoch": 6.97, | |
| "grad_norm": 3.0085911750793457, | |
| "learning_rate": 9.008040201005027e-06, | |
| "loss": 0.0915, | |
| "step": 10375 | |
| }, | |
| { | |
| "epoch": 6.98, | |
| "grad_norm": 3.424804925918579, | |
| "learning_rate": 9.005527638190954e-06, | |
| "loss": 0.0968, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "grad_norm": 3.1618521213531494, | |
| "learning_rate": 9.003015075376885e-06, | |
| "loss": 0.0911, | |
| "step": 10425 | |
| }, | |
| { | |
| "epoch": 7.02, | |
| "grad_norm": 3.355823040008545, | |
| "learning_rate": 9.000502512562815e-06, | |
| "loss": 0.0775, | |
| "step": 10450 | |
| }, | |
| { | |
| "epoch": 7.03, | |
| "grad_norm": 2.7716736793518066, | |
| "learning_rate": 8.997989949748744e-06, | |
| "loss": 0.0776, | |
| "step": 10475 | |
| }, | |
| { | |
| "epoch": 7.05, | |
| "grad_norm": 2.89070987701416, | |
| "learning_rate": 8.995477386934675e-06, | |
| "loss": 0.0803, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 7.07, | |
| "grad_norm": 3.0273945331573486, | |
| "learning_rate": 8.992964824120604e-06, | |
| "loss": 0.0731, | |
| "step": 10525 | |
| }, | |
| { | |
| "epoch": 7.09, | |
| "grad_norm": 2.902979612350464, | |
| "learning_rate": 8.990452261306534e-06, | |
| "loss": 0.0805, | |
| "step": 10550 | |
| }, | |
| { | |
| "epoch": 7.1, | |
| "grad_norm": 2.9858810901641846, | |
| "learning_rate": 8.987939698492463e-06, | |
| "loss": 0.0761, | |
| "step": 10575 | |
| }, | |
| { | |
| "epoch": 7.12, | |
| "grad_norm": 2.780200958251953, | |
| "learning_rate": 8.985427135678392e-06, | |
| "loss": 0.0786, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 7.14, | |
| "grad_norm": 3.0452048778533936, | |
| "learning_rate": 8.982914572864322e-06, | |
| "loss": 0.078, | |
| "step": 10625 | |
| }, | |
| { | |
| "epoch": 7.15, | |
| "grad_norm": 3.0429253578186035, | |
| "learning_rate": 8.980402010050253e-06, | |
| "loss": 0.078, | |
| "step": 10650 | |
| }, | |
| { | |
| "epoch": 7.17, | |
| "grad_norm": 2.758443593978882, | |
| "learning_rate": 8.977889447236182e-06, | |
| "loss": 0.0787, | |
| "step": 10675 | |
| }, | |
| { | |
| "epoch": 7.19, | |
| "grad_norm": 3.3259782791137695, | |
| "learning_rate": 8.975376884422111e-06, | |
| "loss": 0.0818, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 7.2, | |
| "grad_norm": 3.1599812507629395, | |
| "learning_rate": 8.97286432160804e-06, | |
| "loss": 0.0788, | |
| "step": 10725 | |
| }, | |
| { | |
| "epoch": 7.22, | |
| "grad_norm": 3.163283348083496, | |
| "learning_rate": 8.97035175879397e-06, | |
| "loss": 0.0801, | |
| "step": 10750 | |
| }, | |
| { | |
| "epoch": 7.24, | |
| "grad_norm": 3.883058547973633, | |
| "learning_rate": 8.967839195979901e-06, | |
| "loss": 0.0818, | |
| "step": 10775 | |
| }, | |
| { | |
| "epoch": 7.25, | |
| "grad_norm": 3.0166139602661133, | |
| "learning_rate": 8.96532663316583e-06, | |
| "loss": 0.079, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 7.27, | |
| "grad_norm": 3.532127857208252, | |
| "learning_rate": 8.96281407035176e-06, | |
| "loss": 0.0764, | |
| "step": 10825 | |
| }, | |
| { | |
| "epoch": 7.29, | |
| "grad_norm": 2.8934993743896484, | |
| "learning_rate": 8.960301507537689e-06, | |
| "loss": 0.0791, | |
| "step": 10850 | |
| }, | |
| { | |
| "epoch": 7.3, | |
| "grad_norm": 3.4274938106536865, | |
| "learning_rate": 8.957788944723618e-06, | |
| "loss": 0.0788, | |
| "step": 10875 | |
| }, | |
| { | |
| "epoch": 7.32, | |
| "grad_norm": 2.964526891708374, | |
| "learning_rate": 8.95527638190955e-06, | |
| "loss": 0.0781, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 7.34, | |
| "grad_norm": 3.1131231784820557, | |
| "learning_rate": 8.952763819095479e-06, | |
| "loss": 0.0776, | |
| "step": 10925 | |
| }, | |
| { | |
| "epoch": 7.35, | |
| "grad_norm": 2.757322072982788, | |
| "learning_rate": 8.950251256281408e-06, | |
| "loss": 0.0793, | |
| "step": 10950 | |
| }, | |
| { | |
| "epoch": 7.37, | |
| "grad_norm": 2.8853962421417236, | |
| "learning_rate": 8.947738693467337e-06, | |
| "loss": 0.08, | |
| "step": 10975 | |
| }, | |
| { | |
| "epoch": 7.39, | |
| "grad_norm": 3.2388052940368652, | |
| "learning_rate": 8.945226130653267e-06, | |
| "loss": 0.0837, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 7.39, | |
| "eval_loss": 0.11983851343393326, | |
| "eval_runtime": 541.1838, | |
| "eval_samples_per_second": 2.561, | |
| "eval_steps_per_second": 2.561, | |
| "eval_wer": 27.052798869856964, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 7.4, | |
| "grad_norm": 3.431065559387207, | |
| "learning_rate": 8.942713567839196e-06, | |
| "loss": 0.0797, | |
| "step": 11025 | |
| }, | |
| { | |
| "epoch": 7.42, | |
| "grad_norm": 3.1514389514923096, | |
| "learning_rate": 8.940201005025127e-06, | |
| "loss": 0.0809, | |
| "step": 11050 | |
| }, | |
| { | |
| "epoch": 7.44, | |
| "grad_norm": 3.1348989009857178, | |
| "learning_rate": 8.937688442211056e-06, | |
| "loss": 0.0796, | |
| "step": 11075 | |
| }, | |
| { | |
| "epoch": 7.45, | |
| "grad_norm": 3.4892783164978027, | |
| "learning_rate": 8.935175879396986e-06, | |
| "loss": 0.0813, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 7.47, | |
| "grad_norm": 3.3423171043395996, | |
| "learning_rate": 8.932663316582915e-06, | |
| "loss": 0.0768, | |
| "step": 11125 | |
| }, | |
| { | |
| "epoch": 7.49, | |
| "grad_norm": 3.119539499282837, | |
| "learning_rate": 8.930150753768844e-06, | |
| "loss": 0.0833, | |
| "step": 11150 | |
| }, | |
| { | |
| "epoch": 7.51, | |
| "grad_norm": 3.181475877761841, | |
| "learning_rate": 8.927638190954775e-06, | |
| "loss": 0.0803, | |
| "step": 11175 | |
| }, | |
| { | |
| "epoch": 7.52, | |
| "grad_norm": 3.3543057441711426, | |
| "learning_rate": 8.925125628140705e-06, | |
| "loss": 0.0806, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 7.54, | |
| "grad_norm": 3.1575417518615723, | |
| "learning_rate": 8.922613065326634e-06, | |
| "loss": 0.0812, | |
| "step": 11225 | |
| }, | |
| { | |
| "epoch": 7.56, | |
| "grad_norm": 3.0198452472686768, | |
| "learning_rate": 8.920100502512563e-06, | |
| "loss": 0.0805, | |
| "step": 11250 | |
| }, | |
| { | |
| "epoch": 7.57, | |
| "grad_norm": 2.9735798835754395, | |
| "learning_rate": 8.917587939698493e-06, | |
| "loss": 0.0791, | |
| "step": 11275 | |
| }, | |
| { | |
| "epoch": 7.59, | |
| "grad_norm": 3.363503932952881, | |
| "learning_rate": 8.915075376884424e-06, | |
| "loss": 0.0817, | |
| "step": 11300 | |
| }, | |
| { | |
| "epoch": 7.61, | |
| "grad_norm": 3.10579514503479, | |
| "learning_rate": 8.912562814070353e-06, | |
| "loss": 0.0833, | |
| "step": 11325 | |
| }, | |
| { | |
| "epoch": 7.62, | |
| "grad_norm": 3.5427165031433105, | |
| "learning_rate": 8.910050251256282e-06, | |
| "loss": 0.0827, | |
| "step": 11350 | |
| }, | |
| { | |
| "epoch": 7.64, | |
| "grad_norm": 2.9739034175872803, | |
| "learning_rate": 8.907537688442212e-06, | |
| "loss": 0.0795, | |
| "step": 11375 | |
| }, | |
| { | |
| "epoch": 7.66, | |
| "grad_norm": 3.0262250900268555, | |
| "learning_rate": 8.905025125628143e-06, | |
| "loss": 0.0777, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 7.67, | |
| "grad_norm": 2.9359376430511475, | |
| "learning_rate": 8.90251256281407e-06, | |
| "loss": 0.0807, | |
| "step": 11425 | |
| }, | |
| { | |
| "epoch": 7.69, | |
| "grad_norm": 3.158572196960449, | |
| "learning_rate": 8.900000000000001e-06, | |
| "loss": 0.0772, | |
| "step": 11450 | |
| }, | |
| { | |
| "epoch": 7.71, | |
| "grad_norm": 3.330089807510376, | |
| "learning_rate": 8.89748743718593e-06, | |
| "loss": 0.0793, | |
| "step": 11475 | |
| }, | |
| { | |
| "epoch": 7.72, | |
| "grad_norm": 3.2174530029296875, | |
| "learning_rate": 8.89497487437186e-06, | |
| "loss": 0.079, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 7.74, | |
| "grad_norm": 3.673243522644043, | |
| "learning_rate": 8.892462311557791e-06, | |
| "loss": 0.0775, | |
| "step": 11525 | |
| }, | |
| { | |
| "epoch": 7.76, | |
| "grad_norm": 3.3094096183776855, | |
| "learning_rate": 8.889949748743718e-06, | |
| "loss": 0.078, | |
| "step": 11550 | |
| }, | |
| { | |
| "epoch": 7.77, | |
| "grad_norm": 3.426079273223877, | |
| "learning_rate": 8.88743718592965e-06, | |
| "loss": 0.0777, | |
| "step": 11575 | |
| }, | |
| { | |
| "epoch": 7.79, | |
| "grad_norm": 3.517086982727051, | |
| "learning_rate": 8.884924623115579e-06, | |
| "loss": 0.0776, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 7.81, | |
| "grad_norm": 2.9824516773223877, | |
| "learning_rate": 8.882412060301508e-06, | |
| "loss": 0.0805, | |
| "step": 11625 | |
| }, | |
| { | |
| "epoch": 7.82, | |
| "grad_norm": 2.965653896331787, | |
| "learning_rate": 8.879899497487437e-06, | |
| "loss": 0.0786, | |
| "step": 11650 | |
| }, | |
| { | |
| "epoch": 7.84, | |
| "grad_norm": 2.9882099628448486, | |
| "learning_rate": 8.877386934673368e-06, | |
| "loss": 0.0822, | |
| "step": 11675 | |
| }, | |
| { | |
| "epoch": 7.86, | |
| "grad_norm": 3.118823289871216, | |
| "learning_rate": 8.874874371859296e-06, | |
| "loss": 0.0773, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 7.87, | |
| "grad_norm": 4.2748188972473145, | |
| "learning_rate": 8.872361809045227e-06, | |
| "loss": 0.0812, | |
| "step": 11725 | |
| }, | |
| { | |
| "epoch": 7.89, | |
| "grad_norm": 3.5226612091064453, | |
| "learning_rate": 8.869849246231156e-06, | |
| "loss": 0.0801, | |
| "step": 11750 | |
| }, | |
| { | |
| "epoch": 7.91, | |
| "grad_norm": 3.2962095737457275, | |
| "learning_rate": 8.867336683417086e-06, | |
| "loss": 0.0779, | |
| "step": 11775 | |
| }, | |
| { | |
| "epoch": 7.92, | |
| "grad_norm": 3.037177801132202, | |
| "learning_rate": 8.864824120603017e-06, | |
| "loss": 0.0811, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 7.94, | |
| "grad_norm": 3.207000255584717, | |
| "learning_rate": 8.862311557788944e-06, | |
| "loss": 0.0813, | |
| "step": 11825 | |
| }, | |
| { | |
| "epoch": 7.96, | |
| "grad_norm": 3.5045995712280273, | |
| "learning_rate": 8.859798994974875e-06, | |
| "loss": 0.0793, | |
| "step": 11850 | |
| }, | |
| { | |
| "epoch": 7.98, | |
| "grad_norm": 2.9062917232513428, | |
| "learning_rate": 8.857286432160805e-06, | |
| "loss": 0.0808, | |
| "step": 11875 | |
| }, | |
| { | |
| "epoch": 7.99, | |
| "grad_norm": 3.086449146270752, | |
| "learning_rate": 8.854773869346734e-06, | |
| "loss": 0.0757, | |
| "step": 11900 | |
| }, | |
| { | |
| "epoch": 8.01, | |
| "grad_norm": 3.4503021240234375, | |
| "learning_rate": 8.852261306532665e-06, | |
| "loss": 0.0698, | |
| "step": 11925 | |
| }, | |
| { | |
| "epoch": 8.03, | |
| "grad_norm": 2.755633592605591, | |
| "learning_rate": 8.849748743718594e-06, | |
| "loss": 0.0619, | |
| "step": 11950 | |
| }, | |
| { | |
| "epoch": 8.04, | |
| "grad_norm": 3.3875789642333984, | |
| "learning_rate": 8.847236180904524e-06, | |
| "loss": 0.0651, | |
| "step": 11975 | |
| }, | |
| { | |
| "epoch": 8.06, | |
| "grad_norm": 2.697042465209961, | |
| "learning_rate": 8.844723618090453e-06, | |
| "loss": 0.0654, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 8.06, | |
| "eval_loss": 0.12158209830522537, | |
| "eval_runtime": 532.8467, | |
| "eval_samples_per_second": 2.601, | |
| "eval_steps_per_second": 2.601, | |
| "eval_wer": 26.328800988875155, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 8.08, | |
| "grad_norm": 2.8202855587005615, | |
| "learning_rate": 8.842211055276382e-06, | |
| "loss": 0.0658, | |
| "step": 12025 | |
| }, | |
| { | |
| "epoch": 8.09, | |
| "grad_norm": 2.7945172786712646, | |
| "learning_rate": 8.839698492462312e-06, | |
| "loss": 0.0627, | |
| "step": 12050 | |
| }, | |
| { | |
| "epoch": 8.11, | |
| "grad_norm": 3.1584692001342773, | |
| "learning_rate": 8.837185929648243e-06, | |
| "loss": 0.0673, | |
| "step": 12075 | |
| }, | |
| { | |
| "epoch": 8.13, | |
| "grad_norm": 3.1642470359802246, | |
| "learning_rate": 8.83467336683417e-06, | |
| "loss": 0.0683, | |
| "step": 12100 | |
| }, | |
| { | |
| "epoch": 8.14, | |
| "grad_norm": 2.9188601970672607, | |
| "learning_rate": 8.832160804020101e-06, | |
| "loss": 0.0682, | |
| "step": 12125 | |
| }, | |
| { | |
| "epoch": 8.16, | |
| "grad_norm": 3.276679039001465, | |
| "learning_rate": 8.829748743718593e-06, | |
| "loss": 0.0656, | |
| "step": 12150 | |
| }, | |
| { | |
| "epoch": 8.18, | |
| "grad_norm": 2.683711051940918, | |
| "learning_rate": 8.827236180904524e-06, | |
| "loss": 0.0625, | |
| "step": 12175 | |
| }, | |
| { | |
| "epoch": 8.19, | |
| "grad_norm": 3.232003688812256, | |
| "learning_rate": 8.824723618090453e-06, | |
| "loss": 0.066, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 8.21, | |
| "grad_norm": 2.7374961376190186, | |
| "learning_rate": 8.822211055276383e-06, | |
| "loss": 0.0647, | |
| "step": 12225 | |
| }, | |
| { | |
| "epoch": 8.23, | |
| "grad_norm": 3.423482656478882, | |
| "learning_rate": 8.819698492462312e-06, | |
| "loss": 0.0673, | |
| "step": 12250 | |
| }, | |
| { | |
| "epoch": 8.24, | |
| "grad_norm": 2.9813687801361084, | |
| "learning_rate": 8.817185929648241e-06, | |
| "loss": 0.0685, | |
| "step": 12275 | |
| }, | |
| { | |
| "epoch": 8.26, | |
| "grad_norm": 3.047753095626831, | |
| "learning_rate": 8.81467336683417e-06, | |
| "loss": 0.0658, | |
| "step": 12300 | |
| }, | |
| { | |
| "epoch": 8.28, | |
| "grad_norm": 3.4329652786254883, | |
| "learning_rate": 8.812160804020102e-06, | |
| "loss": 0.0662, | |
| "step": 12325 | |
| }, | |
| { | |
| "epoch": 8.29, | |
| "grad_norm": 3.080573081970215, | |
| "learning_rate": 8.809648241206031e-06, | |
| "loss": 0.0674, | |
| "step": 12350 | |
| }, | |
| { | |
| "epoch": 8.31, | |
| "grad_norm": 2.828704833984375, | |
| "learning_rate": 8.80713567839196e-06, | |
| "loss": 0.0694, | |
| "step": 12375 | |
| }, | |
| { | |
| "epoch": 8.33, | |
| "grad_norm": 3.132976531982422, | |
| "learning_rate": 8.804623115577891e-06, | |
| "loss": 0.0685, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 8.34, | |
| "grad_norm": 3.154456615447998, | |
| "learning_rate": 8.802110552763819e-06, | |
| "loss": 0.0679, | |
| "step": 12425 | |
| }, | |
| { | |
| "epoch": 8.36, | |
| "grad_norm": 3.4193313121795654, | |
| "learning_rate": 8.79959798994975e-06, | |
| "loss": 0.0674, | |
| "step": 12450 | |
| }, | |
| { | |
| "epoch": 8.38, | |
| "grad_norm": 3.2318356037139893, | |
| "learning_rate": 8.79708542713568e-06, | |
| "loss": 0.0658, | |
| "step": 12475 | |
| }, | |
| { | |
| "epoch": 8.39, | |
| "grad_norm": 2.9559836387634277, | |
| "learning_rate": 8.794572864321609e-06, | |
| "loss": 0.0647, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 8.41, | |
| "grad_norm": 3.459628105163574, | |
| "learning_rate": 8.792060301507538e-06, | |
| "loss": 0.0693, | |
| "step": 12525 | |
| }, | |
| { | |
| "epoch": 8.43, | |
| "grad_norm": 3.2934398651123047, | |
| "learning_rate": 8.789547738693467e-06, | |
| "loss": 0.0696, | |
| "step": 12550 | |
| }, | |
| { | |
| "epoch": 8.45, | |
| "grad_norm": 3.2100000381469727, | |
| "learning_rate": 8.787035175879398e-06, | |
| "loss": 0.0703, | |
| "step": 12575 | |
| }, | |
| { | |
| "epoch": 8.46, | |
| "grad_norm": 3.280884265899658, | |
| "learning_rate": 8.784522613065328e-06, | |
| "loss": 0.0661, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 8.48, | |
| "grad_norm": 3.1474897861480713, | |
| "learning_rate": 8.782010050251257e-06, | |
| "loss": 0.0663, | |
| "step": 12625 | |
| }, | |
| { | |
| "epoch": 8.5, | |
| "grad_norm": 2.9876487255096436, | |
| "learning_rate": 8.779497487437186e-06, | |
| "loss": 0.0693, | |
| "step": 12650 | |
| }, | |
| { | |
| "epoch": 8.51, | |
| "grad_norm": 3.278313159942627, | |
| "learning_rate": 8.776984924623117e-06, | |
| "loss": 0.0699, | |
| "step": 12675 | |
| }, | |
| { | |
| "epoch": 8.53, | |
| "grad_norm": 3.023169755935669, | |
| "learning_rate": 8.774472361809045e-06, | |
| "loss": 0.0712, | |
| "step": 12700 | |
| }, | |
| { | |
| "epoch": 8.55, | |
| "grad_norm": 3.168148994445801, | |
| "learning_rate": 8.771959798994976e-06, | |
| "loss": 0.0698, | |
| "step": 12725 | |
| }, | |
| { | |
| "epoch": 8.56, | |
| "grad_norm": 3.177262544631958, | |
| "learning_rate": 8.769447236180905e-06, | |
| "loss": 0.0686, | |
| "step": 12750 | |
| }, | |
| { | |
| "epoch": 8.58, | |
| "grad_norm": 3.1487865447998047, | |
| "learning_rate": 8.766934673366834e-06, | |
| "loss": 0.0684, | |
| "step": 12775 | |
| }, | |
| { | |
| "epoch": 8.6, | |
| "grad_norm": 2.9590165615081787, | |
| "learning_rate": 8.764422110552765e-06, | |
| "loss": 0.0691, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 8.61, | |
| "grad_norm": 3.0423812866210938, | |
| "learning_rate": 8.761909547738693e-06, | |
| "loss": 0.0682, | |
| "step": 12825 | |
| }, | |
| { | |
| "epoch": 8.63, | |
| "grad_norm": 3.3768019676208496, | |
| "learning_rate": 8.759396984924624e-06, | |
| "loss": 0.0709, | |
| "step": 12850 | |
| }, | |
| { | |
| "epoch": 8.65, | |
| "grad_norm": 3.7296512126922607, | |
| "learning_rate": 8.756884422110553e-06, | |
| "loss": 0.0701, | |
| "step": 12875 | |
| }, | |
| { | |
| "epoch": 8.66, | |
| "grad_norm": 3.148634433746338, | |
| "learning_rate": 8.754371859296483e-06, | |
| "loss": 0.0634, | |
| "step": 12900 | |
| }, | |
| { | |
| "epoch": 8.68, | |
| "grad_norm": 2.908444881439209, | |
| "learning_rate": 8.751859296482412e-06, | |
| "loss": 0.0659, | |
| "step": 12925 | |
| }, | |
| { | |
| "epoch": 8.7, | |
| "grad_norm": 3.3164865970611572, | |
| "learning_rate": 8.749346733668343e-06, | |
| "loss": 0.0655, | |
| "step": 12950 | |
| }, | |
| { | |
| "epoch": 8.71, | |
| "grad_norm": 2.9725685119628906, | |
| "learning_rate": 8.746834170854272e-06, | |
| "loss": 0.0659, | |
| "step": 12975 | |
| }, | |
| { | |
| "epoch": 8.73, | |
| "grad_norm": 3.171374797821045, | |
| "learning_rate": 8.744321608040202e-06, | |
| "loss": 0.068, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 8.73, | |
| "eval_loss": 0.12423743307590485, | |
| "eval_runtime": 533.8353, | |
| "eval_samples_per_second": 2.596, | |
| "eval_steps_per_second": 2.596, | |
| "eval_wer": 26.86738477838602, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 8.75, | |
| "grad_norm": 3.3160324096679688, | |
| "learning_rate": 8.741809045226131e-06, | |
| "loss": 0.0692, | |
| "step": 13025 | |
| }, | |
| { | |
| "epoch": 8.76, | |
| "grad_norm": 3.2802672386169434, | |
| "learning_rate": 8.73929648241206e-06, | |
| "loss": 0.067, | |
| "step": 13050 | |
| }, | |
| { | |
| "epoch": 8.78, | |
| "grad_norm": 3.2849535942077637, | |
| "learning_rate": 8.736783919597991e-06, | |
| "loss": 0.0654, | |
| "step": 13075 | |
| }, | |
| { | |
| "epoch": 8.8, | |
| "grad_norm": 3.685974359512329, | |
| "learning_rate": 8.734271356783919e-06, | |
| "loss": 0.0687, | |
| "step": 13100 | |
| }, | |
| { | |
| "epoch": 8.81, | |
| "grad_norm": 2.9581081867218018, | |
| "learning_rate": 8.73175879396985e-06, | |
| "loss": 0.0658, | |
| "step": 13125 | |
| }, | |
| { | |
| "epoch": 8.83, | |
| "grad_norm": 3.3408470153808594, | |
| "learning_rate": 8.72924623115578e-06, | |
| "loss": 0.0726, | |
| "step": 13150 | |
| }, | |
| { | |
| "epoch": 8.85, | |
| "grad_norm": 3.5375308990478516, | |
| "learning_rate": 8.726733668341709e-06, | |
| "loss": 0.0688, | |
| "step": 13175 | |
| }, | |
| { | |
| "epoch": 8.87, | |
| "grad_norm": 2.7572827339172363, | |
| "learning_rate": 8.72422110552764e-06, | |
| "loss": 0.0688, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 8.88, | |
| "grad_norm": 3.0948410034179688, | |
| "learning_rate": 8.721708542713569e-06, | |
| "loss": 0.0686, | |
| "step": 13225 | |
| }, | |
| { | |
| "epoch": 8.9, | |
| "grad_norm": 3.076904773712158, | |
| "learning_rate": 8.719195979899498e-06, | |
| "loss": 0.0683, | |
| "step": 13250 | |
| }, | |
| { | |
| "epoch": 8.92, | |
| "grad_norm": 3.060412645339966, | |
| "learning_rate": 8.716683417085428e-06, | |
| "loss": 0.0692, | |
| "step": 13275 | |
| }, | |
| { | |
| "epoch": 8.93, | |
| "grad_norm": 3.1852357387542725, | |
| "learning_rate": 8.714170854271357e-06, | |
| "loss": 0.0647, | |
| "step": 13300 | |
| }, | |
| { | |
| "epoch": 8.95, | |
| "grad_norm": 3.427971601486206, | |
| "learning_rate": 8.711658291457286e-06, | |
| "loss": 0.0675, | |
| "step": 13325 | |
| }, | |
| { | |
| "epoch": 8.97, | |
| "grad_norm": 3.221360683441162, | |
| "learning_rate": 8.709145728643217e-06, | |
| "loss": 0.0702, | |
| "step": 13350 | |
| }, | |
| { | |
| "epoch": 8.98, | |
| "grad_norm": 3.490898847579956, | |
| "learning_rate": 8.706633165829147e-06, | |
| "loss": 0.0693, | |
| "step": 13375 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "grad_norm": 3.1776282787323, | |
| "learning_rate": 8.704120603015076e-06, | |
| "loss": 0.0725, | |
| "step": 13400 | |
| }, | |
| { | |
| "epoch": 9.02, | |
| "grad_norm": 2.52174973487854, | |
| "learning_rate": 8.701608040201005e-06, | |
| "loss": 0.0542, | |
| "step": 13425 | |
| }, | |
| { | |
| "epoch": 9.03, | |
| "grad_norm": 2.8436169624328613, | |
| "learning_rate": 8.699095477386935e-06, | |
| "loss": 0.0543, | |
| "step": 13450 | |
| }, | |
| { | |
| "epoch": 9.05, | |
| "grad_norm": 3.0883164405822754, | |
| "learning_rate": 8.696582914572866e-06, | |
| "loss": 0.0565, | |
| "step": 13475 | |
| }, | |
| { | |
| "epoch": 9.07, | |
| "grad_norm": 3.2945592403411865, | |
| "learning_rate": 8.694070351758795e-06, | |
| "loss": 0.0554, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 9.08, | |
| "grad_norm": 3.1277835369110107, | |
| "learning_rate": 8.691557788944724e-06, | |
| "loss": 0.0575, | |
| "step": 13525 | |
| }, | |
| { | |
| "epoch": 9.1, | |
| "grad_norm": 2.555258274078369, | |
| "learning_rate": 8.689045226130654e-06, | |
| "loss": 0.0557, | |
| "step": 13550 | |
| }, | |
| { | |
| "epoch": 9.12, | |
| "grad_norm": 2.6981780529022217, | |
| "learning_rate": 8.686532663316583e-06, | |
| "loss": 0.056, | |
| "step": 13575 | |
| }, | |
| { | |
| "epoch": 9.13, | |
| "grad_norm": 2.9988884925842285, | |
| "learning_rate": 8.684020100502514e-06, | |
| "loss": 0.0575, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 9.15, | |
| "grad_norm": 2.7814390659332275, | |
| "learning_rate": 8.681507537688443e-06, | |
| "loss": 0.0543, | |
| "step": 13625 | |
| }, | |
| { | |
| "epoch": 9.17, | |
| "grad_norm": 2.8165695667266846, | |
| "learning_rate": 8.678994974874373e-06, | |
| "loss": 0.0542, | |
| "step": 13650 | |
| }, | |
| { | |
| "epoch": 9.18, | |
| "grad_norm": 2.8924388885498047, | |
| "learning_rate": 8.676482412060302e-06, | |
| "loss": 0.0584, | |
| "step": 13675 | |
| }, | |
| { | |
| "epoch": 9.2, | |
| "grad_norm": 2.8846709728240967, | |
| "learning_rate": 8.673969849246231e-06, | |
| "loss": 0.0546, | |
| "step": 13700 | |
| }, | |
| { | |
| "epoch": 9.22, | |
| "grad_norm": 3.0931618213653564, | |
| "learning_rate": 8.67145728643216e-06, | |
| "loss": 0.0541, | |
| "step": 13725 | |
| }, | |
| { | |
| "epoch": 9.23, | |
| "grad_norm": 3.0044896602630615, | |
| "learning_rate": 8.668944723618092e-06, | |
| "loss": 0.0566, | |
| "step": 13750 | |
| }, | |
| { | |
| "epoch": 9.25, | |
| "grad_norm": 2.992866039276123, | |
| "learning_rate": 8.666432160804021e-06, | |
| "loss": 0.0568, | |
| "step": 13775 | |
| }, | |
| { | |
| "epoch": 9.27, | |
| "grad_norm": 3.3243565559387207, | |
| "learning_rate": 8.66391959798995e-06, | |
| "loss": 0.0575, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 9.28, | |
| "grad_norm": 3.164736747741699, | |
| "learning_rate": 8.661407035175881e-06, | |
| "loss": 0.0565, | |
| "step": 13825 | |
| }, | |
| { | |
| "epoch": 9.3, | |
| "grad_norm": 2.89432430267334, | |
| "learning_rate": 8.658894472361809e-06, | |
| "loss": 0.0571, | |
| "step": 13850 | |
| }, | |
| { | |
| "epoch": 9.32, | |
| "grad_norm": 3.053514242172241, | |
| "learning_rate": 8.65638190954774e-06, | |
| "loss": 0.0582, | |
| "step": 13875 | |
| }, | |
| { | |
| "epoch": 9.34, | |
| "grad_norm": 2.7615840435028076, | |
| "learning_rate": 8.65386934673367e-06, | |
| "loss": 0.0566, | |
| "step": 13900 | |
| }, | |
| { | |
| "epoch": 9.35, | |
| "grad_norm": 3.1976537704467773, | |
| "learning_rate": 8.651356783919599e-06, | |
| "loss": 0.0578, | |
| "step": 13925 | |
| }, | |
| { | |
| "epoch": 9.37, | |
| "grad_norm": 3.1072587966918945, | |
| "learning_rate": 8.648844221105528e-06, | |
| "loss": 0.0577, | |
| "step": 13950 | |
| }, | |
| { | |
| "epoch": 9.39, | |
| "grad_norm": 3.4911906719207764, | |
| "learning_rate": 8.646331658291457e-06, | |
| "loss": 0.0548, | |
| "step": 13975 | |
| }, | |
| { | |
| "epoch": 9.4, | |
| "grad_norm": 2.923501968383789, | |
| "learning_rate": 8.643819095477388e-06, | |
| "loss": 0.0586, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 9.4, | |
| "eval_loss": 0.1282009482383728, | |
| "eval_runtime": 533.8178, | |
| "eval_samples_per_second": 2.596, | |
| "eval_steps_per_second": 2.596, | |
| "eval_wer": 26.982164930248985, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 9.42, | |
| "grad_norm": 3.0205700397491455, | |
| "learning_rate": 8.641306532663318e-06, | |
| "loss": 0.0552, | |
| "step": 14025 | |
| }, | |
| { | |
| "epoch": 9.44, | |
| "grad_norm": 3.022747278213501, | |
| "learning_rate": 8.638793969849247e-06, | |
| "loss": 0.0574, | |
| "step": 14050 | |
| }, | |
| { | |
| "epoch": 9.45, | |
| "grad_norm": 3.2978105545043945, | |
| "learning_rate": 8.636281407035176e-06, | |
| "loss": 0.0571, | |
| "step": 14075 | |
| }, | |
| { | |
| "epoch": 9.47, | |
| "grad_norm": 3.0741355419158936, | |
| "learning_rate": 8.633768844221107e-06, | |
| "loss": 0.0556, | |
| "step": 14100 | |
| }, | |
| { | |
| "epoch": 9.49, | |
| "grad_norm": 2.8877174854278564, | |
| "learning_rate": 8.631256281407035e-06, | |
| "loss": 0.0538, | |
| "step": 14125 | |
| }, | |
| { | |
| "epoch": 9.5, | |
| "grad_norm": 3.618729591369629, | |
| "learning_rate": 8.628743718592966e-06, | |
| "loss": 0.0592, | |
| "step": 14150 | |
| }, | |
| { | |
| "epoch": 9.52, | |
| "grad_norm": 3.005646228790283, | |
| "learning_rate": 8.626231155778895e-06, | |
| "loss": 0.057, | |
| "step": 14175 | |
| }, | |
| { | |
| "epoch": 9.54, | |
| "grad_norm": 3.3048083782196045, | |
| "learning_rate": 8.623718592964825e-06, | |
| "loss": 0.0564, | |
| "step": 14200 | |
| }, | |
| { | |
| "epoch": 9.55, | |
| "grad_norm": 3.2562224864959717, | |
| "learning_rate": 8.621206030150756e-06, | |
| "loss": 0.0571, | |
| "step": 14225 | |
| }, | |
| { | |
| "epoch": 9.57, | |
| "grad_norm": 2.980013608932495, | |
| "learning_rate": 8.618693467336683e-06, | |
| "loss": 0.0564, | |
| "step": 14250 | |
| }, | |
| { | |
| "epoch": 9.59, | |
| "grad_norm": 3.220036745071411, | |
| "learning_rate": 8.616180904522614e-06, | |
| "loss": 0.0588, | |
| "step": 14275 | |
| }, | |
| { | |
| "epoch": 9.6, | |
| "grad_norm": 3.4643850326538086, | |
| "learning_rate": 8.613668341708544e-06, | |
| "loss": 0.0565, | |
| "step": 14300 | |
| }, | |
| { | |
| "epoch": 9.62, | |
| "grad_norm": 3.2021632194519043, | |
| "learning_rate": 8.611155778894473e-06, | |
| "loss": 0.0586, | |
| "step": 14325 | |
| }, | |
| { | |
| "epoch": 9.64, | |
| "grad_norm": 3.2279539108276367, | |
| "learning_rate": 8.608643216080402e-06, | |
| "loss": 0.0562, | |
| "step": 14350 | |
| }, | |
| { | |
| "epoch": 9.65, | |
| "grad_norm": 3.429431438446045, | |
| "learning_rate": 8.606130653266333e-06, | |
| "loss": 0.0585, | |
| "step": 14375 | |
| }, | |
| { | |
| "epoch": 9.67, | |
| "grad_norm": 3.278526544570923, | |
| "learning_rate": 8.60361809045226e-06, | |
| "loss": 0.0584, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 9.69, | |
| "grad_norm": 3.5569005012512207, | |
| "learning_rate": 8.601105527638192e-06, | |
| "loss": 0.0587, | |
| "step": 14425 | |
| }, | |
| { | |
| "epoch": 9.7, | |
| "grad_norm": 3.0540413856506348, | |
| "learning_rate": 8.598592964824121e-06, | |
| "loss": 0.0582, | |
| "step": 14450 | |
| }, | |
| { | |
| "epoch": 9.72, | |
| "grad_norm": 2.9771244525909424, | |
| "learning_rate": 8.59608040201005e-06, | |
| "loss": 0.0544, | |
| "step": 14475 | |
| }, | |
| { | |
| "epoch": 9.74, | |
| "grad_norm": 3.271925926208496, | |
| "learning_rate": 8.593567839195981e-06, | |
| "loss": 0.0556, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 9.75, | |
| "grad_norm": 3.2107813358306885, | |
| "learning_rate": 8.591055276381909e-06, | |
| "loss": 0.0556, | |
| "step": 14525 | |
| }, | |
| { | |
| "epoch": 9.77, | |
| "grad_norm": 2.9411368370056152, | |
| "learning_rate": 8.58854271356784e-06, | |
| "loss": 0.06, | |
| "step": 14550 | |
| }, | |
| { | |
| "epoch": 9.79, | |
| "grad_norm": 2.9419991970062256, | |
| "learning_rate": 8.58603015075377e-06, | |
| "loss": 0.055, | |
| "step": 14575 | |
| }, | |
| { | |
| "epoch": 9.81, | |
| "grad_norm": 3.3104031085968018, | |
| "learning_rate": 8.583517587939699e-06, | |
| "loss": 0.0586, | |
| "step": 14600 | |
| }, | |
| { | |
| "epoch": 9.82, | |
| "grad_norm": 3.488868236541748, | |
| "learning_rate": 8.58100502512563e-06, | |
| "loss": 0.0608, | |
| "step": 14625 | |
| }, | |
| { | |
| "epoch": 9.84, | |
| "grad_norm": 2.7537827491760254, | |
| "learning_rate": 8.578492462311559e-06, | |
| "loss": 0.061, | |
| "step": 14650 | |
| }, | |
| { | |
| "epoch": 9.86, | |
| "grad_norm": 2.967761278152466, | |
| "learning_rate": 8.575979899497488e-06, | |
| "loss": 0.0616, | |
| "step": 14675 | |
| }, | |
| { | |
| "epoch": 9.87, | |
| "grad_norm": 2.6756021976470947, | |
| "learning_rate": 8.573467336683418e-06, | |
| "loss": 0.0572, | |
| "step": 14700 | |
| }, | |
| { | |
| "epoch": 9.89, | |
| "grad_norm": 3.6669530868530273, | |
| "learning_rate": 8.570954773869347e-06, | |
| "loss": 0.0545, | |
| "step": 14725 | |
| }, | |
| { | |
| "epoch": 9.91, | |
| "grad_norm": 3.402998208999634, | |
| "learning_rate": 8.568442211055276e-06, | |
| "loss": 0.0595, | |
| "step": 14750 | |
| }, | |
| { | |
| "epoch": 9.92, | |
| "grad_norm": 3.397134304046631, | |
| "learning_rate": 8.565929648241207e-06, | |
| "loss": 0.0582, | |
| "step": 14775 | |
| }, | |
| { | |
| "epoch": 9.94, | |
| "grad_norm": 3.193824291229248, | |
| "learning_rate": 8.563417085427135e-06, | |
| "loss": 0.0558, | |
| "step": 14800 | |
| }, | |
| { | |
| "epoch": 9.96, | |
| "grad_norm": 3.0948803424835205, | |
| "learning_rate": 8.560904522613066e-06, | |
| "loss": 0.0572, | |
| "step": 14825 | |
| }, | |
| { | |
| "epoch": 9.97, | |
| "grad_norm": 3.6509146690368652, | |
| "learning_rate": 8.558391959798995e-06, | |
| "loss": 0.0595, | |
| "step": 14850 | |
| }, | |
| { | |
| "epoch": 9.99, | |
| "grad_norm": 3.0662288665771484, | |
| "learning_rate": 8.555879396984925e-06, | |
| "loss": 0.057, | |
| "step": 14875 | |
| }, | |
| { | |
| "epoch": 10.01, | |
| "grad_norm": 2.2760088443756104, | |
| "learning_rate": 8.553366834170856e-06, | |
| "loss": 0.0524, | |
| "step": 14900 | |
| }, | |
| { | |
| "epoch": 10.02, | |
| "grad_norm": 2.8303427696228027, | |
| "learning_rate": 8.550854271356785e-06, | |
| "loss": 0.0494, | |
| "step": 14925 | |
| }, | |
| { | |
| "epoch": 10.04, | |
| "grad_norm": 3.1542868614196777, | |
| "learning_rate": 8.548341708542714e-06, | |
| "loss": 0.0445, | |
| "step": 14950 | |
| }, | |
| { | |
| "epoch": 10.06, | |
| "grad_norm": 2.8265697956085205, | |
| "learning_rate": 8.545829145728644e-06, | |
| "loss": 0.0464, | |
| "step": 14975 | |
| }, | |
| { | |
| "epoch": 10.07, | |
| "grad_norm": 3.163896322250366, | |
| "learning_rate": 8.543316582914573e-06, | |
| "loss": 0.047, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 10.07, | |
| "eval_loss": 0.13359740376472473, | |
| "eval_runtime": 533.7428, | |
| "eval_samples_per_second": 2.597, | |
| "eval_steps_per_second": 2.597, | |
| "eval_wer": 27.405968567896878, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 10.09, | |
| "grad_norm": 2.813354253768921, | |
| "learning_rate": 8.540804020100502e-06, | |
| "loss": 0.0476, | |
| "step": 15025 | |
| }, | |
| { | |
| "epoch": 10.11, | |
| "grad_norm": 2.448727607727051, | |
| "learning_rate": 8.538291457286433e-06, | |
| "loss": 0.0448, | |
| "step": 15050 | |
| }, | |
| { | |
| "epoch": 10.12, | |
| "grad_norm": 2.798645257949829, | |
| "learning_rate": 8.535778894472363e-06, | |
| "loss": 0.0458, | |
| "step": 15075 | |
| }, | |
| { | |
| "epoch": 10.14, | |
| "grad_norm": 2.969273090362549, | |
| "learning_rate": 8.533266331658292e-06, | |
| "loss": 0.0442, | |
| "step": 15100 | |
| }, | |
| { | |
| "epoch": 10.16, | |
| "grad_norm": 2.901127576828003, | |
| "learning_rate": 8.530753768844221e-06, | |
| "loss": 0.0431, | |
| "step": 15125 | |
| }, | |
| { | |
| "epoch": 10.17, | |
| "grad_norm": 3.0042836666107178, | |
| "learning_rate": 8.52824120603015e-06, | |
| "loss": 0.049, | |
| "step": 15150 | |
| }, | |
| { | |
| "epoch": 10.19, | |
| "grad_norm": 2.694744825363159, | |
| "learning_rate": 8.525728643216082e-06, | |
| "loss": 0.0474, | |
| "step": 15175 | |
| }, | |
| { | |
| "epoch": 10.21, | |
| "grad_norm": 2.79301118850708, | |
| "learning_rate": 8.523216080402011e-06, | |
| "loss": 0.0459, | |
| "step": 15200 | |
| }, | |
| { | |
| "epoch": 10.22, | |
| "grad_norm": 3.328848123550415, | |
| "learning_rate": 8.52070351758794e-06, | |
| "loss": 0.0481, | |
| "step": 15225 | |
| }, | |
| { | |
| "epoch": 10.24, | |
| "grad_norm": 3.0490903854370117, | |
| "learning_rate": 8.518190954773871e-06, | |
| "loss": 0.0467, | |
| "step": 15250 | |
| }, | |
| { | |
| "epoch": 10.26, | |
| "grad_norm": 2.891860246658325, | |
| "learning_rate": 8.515678391959799e-06, | |
| "loss": 0.0482, | |
| "step": 15275 | |
| }, | |
| { | |
| "epoch": 10.28, | |
| "grad_norm": 3.29339599609375, | |
| "learning_rate": 8.51316582914573e-06, | |
| "loss": 0.0468, | |
| "step": 15300 | |
| }, | |
| { | |
| "epoch": 10.29, | |
| "grad_norm": 2.871262550354004, | |
| "learning_rate": 8.51065326633166e-06, | |
| "loss": 0.0465, | |
| "step": 15325 | |
| }, | |
| { | |
| "epoch": 10.31, | |
| "grad_norm": 2.673008680343628, | |
| "learning_rate": 8.508140703517589e-06, | |
| "loss": 0.0457, | |
| "step": 15350 | |
| }, | |
| { | |
| "epoch": 10.33, | |
| "grad_norm": 2.5940115451812744, | |
| "learning_rate": 8.505628140703518e-06, | |
| "loss": 0.049, | |
| "step": 15375 | |
| }, | |
| { | |
| "epoch": 10.34, | |
| "grad_norm": 2.8226072788238525, | |
| "learning_rate": 8.503115577889447e-06, | |
| "loss": 0.0472, | |
| "step": 15400 | |
| }, | |
| { | |
| "epoch": 10.36, | |
| "grad_norm": 2.800179958343506, | |
| "learning_rate": 8.500603015075377e-06, | |
| "loss": 0.0477, | |
| "step": 15425 | |
| }, | |
| { | |
| "epoch": 10.38, | |
| "grad_norm": 3.0697898864746094, | |
| "learning_rate": 8.498090452261308e-06, | |
| "loss": 0.0448, | |
| "step": 15450 | |
| }, | |
| { | |
| "epoch": 10.39, | |
| "grad_norm": 2.9394161701202393, | |
| "learning_rate": 8.495577889447237e-06, | |
| "loss": 0.0464, | |
| "step": 15475 | |
| }, | |
| { | |
| "epoch": 10.41, | |
| "grad_norm": 3.055058479309082, | |
| "learning_rate": 8.493065326633166e-06, | |
| "loss": 0.0486, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 10.43, | |
| "grad_norm": 3.4436676502227783, | |
| "learning_rate": 8.490552763819097e-06, | |
| "loss": 0.0479, | |
| "step": 15525 | |
| }, | |
| { | |
| "epoch": 10.44, | |
| "grad_norm": 3.167590379714966, | |
| "learning_rate": 8.488040201005025e-06, | |
| "loss": 0.049, | |
| "step": 15550 | |
| }, | |
| { | |
| "epoch": 10.46, | |
| "grad_norm": 2.786879539489746, | |
| "learning_rate": 8.485527638190956e-06, | |
| "loss": 0.0476, | |
| "step": 15575 | |
| }, | |
| { | |
| "epoch": 10.48, | |
| "grad_norm": 3.0949158668518066, | |
| "learning_rate": 8.483015075376885e-06, | |
| "loss": 0.0463, | |
| "step": 15600 | |
| }, | |
| { | |
| "epoch": 10.49, | |
| "grad_norm": 3.426304340362549, | |
| "learning_rate": 8.480502512562815e-06, | |
| "loss": 0.0475, | |
| "step": 15625 | |
| }, | |
| { | |
| "epoch": 10.51, | |
| "grad_norm": 3.1173408031463623, | |
| "learning_rate": 8.477989949748744e-06, | |
| "loss": 0.0476, | |
| "step": 15650 | |
| }, | |
| { | |
| "epoch": 10.53, | |
| "grad_norm": 2.856600046157837, | |
| "learning_rate": 8.475477386934673e-06, | |
| "loss": 0.0471, | |
| "step": 15675 | |
| }, | |
| { | |
| "epoch": 10.54, | |
| "grad_norm": 3.2512564659118652, | |
| "learning_rate": 8.472964824120604e-06, | |
| "loss": 0.0483, | |
| "step": 15700 | |
| }, | |
| { | |
| "epoch": 10.56, | |
| "grad_norm": 3.3549506664276123, | |
| "learning_rate": 8.470452261306534e-06, | |
| "loss": 0.0462, | |
| "step": 15725 | |
| }, | |
| { | |
| "epoch": 10.58, | |
| "grad_norm": 2.7729334831237793, | |
| "learning_rate": 8.467939698492463e-06, | |
| "loss": 0.0472, | |
| "step": 15750 | |
| }, | |
| { | |
| "epoch": 10.59, | |
| "grad_norm": 2.711257219314575, | |
| "learning_rate": 8.465427135678392e-06, | |
| "loss": 0.0472, | |
| "step": 15775 | |
| }, | |
| { | |
| "epoch": 10.61, | |
| "grad_norm": 3.229771375656128, | |
| "learning_rate": 8.462914572864323e-06, | |
| "loss": 0.0479, | |
| "step": 15800 | |
| }, | |
| { | |
| "epoch": 10.63, | |
| "grad_norm": 3.0402400493621826, | |
| "learning_rate": 8.460402010050251e-06, | |
| "loss": 0.0503, | |
| "step": 15825 | |
| }, | |
| { | |
| "epoch": 10.64, | |
| "grad_norm": 2.9210867881774902, | |
| "learning_rate": 8.457989949748744e-06, | |
| "loss": 0.0497, | |
| "step": 15850 | |
| }, | |
| { | |
| "epoch": 10.66, | |
| "grad_norm": 3.3483831882476807, | |
| "learning_rate": 8.455577889447237e-06, | |
| "loss": 0.0475, | |
| "step": 15875 | |
| }, | |
| { | |
| "epoch": 10.68, | |
| "grad_norm": 3.053593873977661, | |
| "learning_rate": 8.453065326633167e-06, | |
| "loss": 0.046, | |
| "step": 15900 | |
| }, | |
| { | |
| "epoch": 10.7, | |
| "grad_norm": 3.136958599090576, | |
| "learning_rate": 8.450552763819096e-06, | |
| "loss": 0.0509, | |
| "step": 15925 | |
| }, | |
| { | |
| "epoch": 10.71, | |
| "grad_norm": 3.1040425300598145, | |
| "learning_rate": 8.448040201005025e-06, | |
| "loss": 0.0504, | |
| "step": 15950 | |
| }, | |
| { | |
| "epoch": 10.73, | |
| "grad_norm": 2.8489692211151123, | |
| "learning_rate": 8.445527638190956e-06, | |
| "loss": 0.0484, | |
| "step": 15975 | |
| }, | |
| { | |
| "epoch": 10.75, | |
| "grad_norm": 2.8868560791015625, | |
| "learning_rate": 8.443015075376884e-06, | |
| "loss": 0.0475, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 10.75, | |
| "eval_loss": 0.1362370103597641, | |
| "eval_runtime": 536.1147, | |
| "eval_samples_per_second": 2.585, | |
| "eval_steps_per_second": 2.585, | |
| "eval_wer": 27.441285537700864, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 10.76, | |
| "grad_norm": 3.188688039779663, | |
| "learning_rate": 8.440502512562815e-06, | |
| "loss": 0.0502, | |
| "step": 16025 | |
| }, | |
| { | |
| "epoch": 10.78, | |
| "grad_norm": 2.4469282627105713, | |
| "learning_rate": 8.437989949748744e-06, | |
| "loss": 0.0459, | |
| "step": 16050 | |
| }, | |
| { | |
| "epoch": 10.8, | |
| "grad_norm": 2.948697328567505, | |
| "learning_rate": 8.435477386934674e-06, | |
| "loss": 0.0472, | |
| "step": 16075 | |
| }, | |
| { | |
| "epoch": 10.81, | |
| "grad_norm": 3.236891508102417, | |
| "learning_rate": 8.432964824120605e-06, | |
| "loss": 0.0494, | |
| "step": 16100 | |
| }, | |
| { | |
| "epoch": 10.83, | |
| "grad_norm": 3.0507919788360596, | |
| "learning_rate": 8.430452261306534e-06, | |
| "loss": 0.0494, | |
| "step": 16125 | |
| }, | |
| { | |
| "epoch": 10.85, | |
| "grad_norm": 2.8577802181243896, | |
| "learning_rate": 8.427939698492463e-06, | |
| "loss": 0.0487, | |
| "step": 16150 | |
| }, | |
| { | |
| "epoch": 10.86, | |
| "grad_norm": 3.035109758377075, | |
| "learning_rate": 8.425427135678393e-06, | |
| "loss": 0.0486, | |
| "step": 16175 | |
| }, | |
| { | |
| "epoch": 10.88, | |
| "grad_norm": 3.5497820377349854, | |
| "learning_rate": 8.422914572864322e-06, | |
| "loss": 0.0497, | |
| "step": 16200 | |
| }, | |
| { | |
| "epoch": 10.9, | |
| "grad_norm": 2.838867664337158, | |
| "learning_rate": 8.420402010050251e-06, | |
| "loss": 0.0451, | |
| "step": 16225 | |
| }, | |
| { | |
| "epoch": 10.91, | |
| "grad_norm": 3.316819190979004, | |
| "learning_rate": 8.417889447236182e-06, | |
| "loss": 0.0489, | |
| "step": 16250 | |
| }, | |
| { | |
| "epoch": 10.93, | |
| "grad_norm": 3.3198862075805664, | |
| "learning_rate": 8.415376884422112e-06, | |
| "loss": 0.0528, | |
| "step": 16275 | |
| }, | |
| { | |
| "epoch": 10.95, | |
| "grad_norm": 3.4924492835998535, | |
| "learning_rate": 8.412864321608041e-06, | |
| "loss": 0.0492, | |
| "step": 16300 | |
| }, | |
| { | |
| "epoch": 10.96, | |
| "grad_norm": 3.0983831882476807, | |
| "learning_rate": 8.41035175879397e-06, | |
| "loss": 0.0498, | |
| "step": 16325 | |
| }, | |
| { | |
| "epoch": 10.98, | |
| "grad_norm": 3.4345991611480713, | |
| "learning_rate": 8.4078391959799e-06, | |
| "loss": 0.0483, | |
| "step": 16350 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "grad_norm": 3.294377326965332, | |
| "learning_rate": 8.40532663316583e-06, | |
| "loss": 0.0485, | |
| "step": 16375 | |
| }, | |
| { | |
| "epoch": 11.01, | |
| "grad_norm": 2.1766245365142822, | |
| "learning_rate": 8.40281407035176e-06, | |
| "loss": 0.0371, | |
| "step": 16400 | |
| }, | |
| { | |
| "epoch": 11.03, | |
| "grad_norm": 2.683638334274292, | |
| "learning_rate": 8.40030150753769e-06, | |
| "loss": 0.0355, | |
| "step": 16425 | |
| }, | |
| { | |
| "epoch": 11.05, | |
| "grad_norm": 2.8458847999572754, | |
| "learning_rate": 8.397788944723619e-06, | |
| "loss": 0.038, | |
| "step": 16450 | |
| }, | |
| { | |
| "epoch": 11.06, | |
| "grad_norm": 2.7042036056518555, | |
| "learning_rate": 8.395276381909548e-06, | |
| "loss": 0.0375, | |
| "step": 16475 | |
| }, | |
| { | |
| "epoch": 11.08, | |
| "grad_norm": 2.0865659713745117, | |
| "learning_rate": 8.392763819095479e-06, | |
| "loss": 0.039, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 11.1, | |
| "grad_norm": 2.3241260051727295, | |
| "learning_rate": 8.390251256281408e-06, | |
| "loss": 0.0365, | |
| "step": 16525 | |
| }, | |
| { | |
| "epoch": 11.11, | |
| "grad_norm": 2.7509355545043945, | |
| "learning_rate": 8.387738693467338e-06, | |
| "loss": 0.0392, | |
| "step": 16550 | |
| }, | |
| { | |
| "epoch": 11.13, | |
| "grad_norm": 2.3158955574035645, | |
| "learning_rate": 8.385226130653267e-06, | |
| "loss": 0.0399, | |
| "step": 16575 | |
| }, | |
| { | |
| "epoch": 11.15, | |
| "grad_norm": 2.368791103363037, | |
| "learning_rate": 8.382713567839196e-06, | |
| "loss": 0.0366, | |
| "step": 16600 | |
| }, | |
| { | |
| "epoch": 11.17, | |
| "grad_norm": 3.157816171646118, | |
| "learning_rate": 8.380201005025126e-06, | |
| "loss": 0.0386, | |
| "step": 16625 | |
| }, | |
| { | |
| "epoch": 11.18, | |
| "grad_norm": 2.391731023788452, | |
| "learning_rate": 8.377688442211057e-06, | |
| "loss": 0.0409, | |
| "step": 16650 | |
| }, | |
| { | |
| "epoch": 11.2, | |
| "grad_norm": 2.881032943725586, | |
| "learning_rate": 8.375175879396986e-06, | |
| "loss": 0.0399, | |
| "step": 16675 | |
| }, | |
| { | |
| "epoch": 11.22, | |
| "grad_norm": 2.8162527084350586, | |
| "learning_rate": 8.372663316582915e-06, | |
| "loss": 0.0386, | |
| "step": 16700 | |
| }, | |
| { | |
| "epoch": 11.23, | |
| "grad_norm": 2.798832654953003, | |
| "learning_rate": 8.370150753768845e-06, | |
| "loss": 0.0389, | |
| "step": 16725 | |
| }, | |
| { | |
| "epoch": 11.25, | |
| "grad_norm": 2.4073362350463867, | |
| "learning_rate": 8.367638190954774e-06, | |
| "loss": 0.038, | |
| "step": 16750 | |
| }, | |
| { | |
| "epoch": 11.27, | |
| "grad_norm": 3.539222002029419, | |
| "learning_rate": 8.365125628140705e-06, | |
| "loss": 0.0385, | |
| "step": 16775 | |
| }, | |
| { | |
| "epoch": 11.28, | |
| "grad_norm": 3.047471761703491, | |
| "learning_rate": 8.362613065326634e-06, | |
| "loss": 0.0386, | |
| "step": 16800 | |
| }, | |
| { | |
| "epoch": 11.3, | |
| "grad_norm": 2.62675142288208, | |
| "learning_rate": 8.360100502512563e-06, | |
| "loss": 0.0388, | |
| "step": 16825 | |
| }, | |
| { | |
| "epoch": 11.32, | |
| "grad_norm": 2.6403391361236572, | |
| "learning_rate": 8.357587939698493e-06, | |
| "loss": 0.041, | |
| "step": 16850 | |
| }, | |
| { | |
| "epoch": 11.33, | |
| "grad_norm": 2.7048850059509277, | |
| "learning_rate": 8.355075376884422e-06, | |
| "loss": 0.0405, | |
| "step": 16875 | |
| }, | |
| { | |
| "epoch": 11.35, | |
| "grad_norm": 2.8291220664978027, | |
| "learning_rate": 8.352562814070353e-06, | |
| "loss": 0.0375, | |
| "step": 16900 | |
| }, | |
| { | |
| "epoch": 11.37, | |
| "grad_norm": 2.9671170711517334, | |
| "learning_rate": 8.350050251256282e-06, | |
| "loss": 0.0377, | |
| "step": 16925 | |
| }, | |
| { | |
| "epoch": 11.38, | |
| "grad_norm": 3.0989413261413574, | |
| "learning_rate": 8.347537688442212e-06, | |
| "loss": 0.039, | |
| "step": 16950 | |
| }, | |
| { | |
| "epoch": 11.4, | |
| "grad_norm": 2.738807201385498, | |
| "learning_rate": 8.345025125628141e-06, | |
| "loss": 0.0399, | |
| "step": 16975 | |
| }, | |
| { | |
| "epoch": 11.42, | |
| "grad_norm": 2.9761691093444824, | |
| "learning_rate": 8.34251256281407e-06, | |
| "loss": 0.0402, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 11.42, | |
| "eval_loss": 0.13800786435604095, | |
| "eval_runtime": 531.8418, | |
| "eval_samples_per_second": 2.606, | |
| "eval_steps_per_second": 2.606, | |
| "eval_wer": 27.76796750838778, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 11.43, | |
| "grad_norm": 3.1192235946655273, | |
| "learning_rate": 8.34e-06, | |
| "loss": 0.0389, | |
| "step": 17025 | |
| }, | |
| { | |
| "epoch": 11.45, | |
| "grad_norm": 3.019216299057007, | |
| "learning_rate": 8.33748743718593e-06, | |
| "loss": 0.0413, | |
| "step": 17050 | |
| }, | |
| { | |
| "epoch": 11.47, | |
| "grad_norm": 2.6235885620117188, | |
| "learning_rate": 8.33497487437186e-06, | |
| "loss": 0.043, | |
| "step": 17075 | |
| }, | |
| { | |
| "epoch": 11.48, | |
| "grad_norm": 3.3072292804718018, | |
| "learning_rate": 8.33246231155779e-06, | |
| "loss": 0.0384, | |
| "step": 17100 | |
| }, | |
| { | |
| "epoch": 11.5, | |
| "grad_norm": 3.032578706741333, | |
| "learning_rate": 8.32994974874372e-06, | |
| "loss": 0.0394, | |
| "step": 17125 | |
| }, | |
| { | |
| "epoch": 11.52, | |
| "grad_norm": 3.0692577362060547, | |
| "learning_rate": 8.327437185929648e-06, | |
| "loss": 0.0402, | |
| "step": 17150 | |
| }, | |
| { | |
| "epoch": 11.53, | |
| "grad_norm": 3.113739252090454, | |
| "learning_rate": 8.324924623115579e-06, | |
| "loss": 0.038, | |
| "step": 17175 | |
| }, | |
| { | |
| "epoch": 11.55, | |
| "grad_norm": 3.1510965824127197, | |
| "learning_rate": 8.322412060301508e-06, | |
| "loss": 0.0423, | |
| "step": 17200 | |
| }, | |
| { | |
| "epoch": 11.57, | |
| "grad_norm": 3.110407590866089, | |
| "learning_rate": 8.319899497487438e-06, | |
| "loss": 0.0381, | |
| "step": 17225 | |
| }, | |
| { | |
| "epoch": 11.58, | |
| "grad_norm": 2.9603676795959473, | |
| "learning_rate": 8.317386934673367e-06, | |
| "loss": 0.0421, | |
| "step": 17250 | |
| }, | |
| { | |
| "epoch": 11.6, | |
| "grad_norm": 2.7330162525177, | |
| "learning_rate": 8.314874371859298e-06, | |
| "loss": 0.04, | |
| "step": 17275 | |
| }, | |
| { | |
| "epoch": 11.62, | |
| "grad_norm": 3.783348798751831, | |
| "learning_rate": 8.312361809045226e-06, | |
| "loss": 0.0428, | |
| "step": 17300 | |
| }, | |
| { | |
| "epoch": 11.64, | |
| "grad_norm": 3.3141326904296875, | |
| "learning_rate": 8.309849246231157e-06, | |
| "loss": 0.04, | |
| "step": 17325 | |
| }, | |
| { | |
| "epoch": 11.65, | |
| "grad_norm": 3.1341404914855957, | |
| "learning_rate": 8.307336683417086e-06, | |
| "loss": 0.0389, | |
| "step": 17350 | |
| }, | |
| { | |
| "epoch": 11.67, | |
| "grad_norm": 2.5702879428863525, | |
| "learning_rate": 8.304824120603015e-06, | |
| "loss": 0.0411, | |
| "step": 17375 | |
| }, | |
| { | |
| "epoch": 11.69, | |
| "grad_norm": 2.7597875595092773, | |
| "learning_rate": 8.302311557788946e-06, | |
| "loss": 0.0387, | |
| "step": 17400 | |
| }, | |
| { | |
| "epoch": 11.7, | |
| "grad_norm": 3.1602911949157715, | |
| "learning_rate": 8.299798994974874e-06, | |
| "loss": 0.0401, | |
| "step": 17425 | |
| }, | |
| { | |
| "epoch": 11.72, | |
| "grad_norm": 2.9719858169555664, | |
| "learning_rate": 8.297286432160805e-06, | |
| "loss": 0.04, | |
| "step": 17450 | |
| }, | |
| { | |
| "epoch": 11.74, | |
| "grad_norm": 2.7361767292022705, | |
| "learning_rate": 8.294773869346734e-06, | |
| "loss": 0.041, | |
| "step": 17475 | |
| }, | |
| { | |
| "epoch": 11.75, | |
| "grad_norm": 2.7034785747528076, | |
| "learning_rate": 8.292261306532664e-06, | |
| "loss": 0.0413, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 11.77, | |
| "grad_norm": 3.2431066036224365, | |
| "learning_rate": 8.289748743718595e-06, | |
| "loss": 0.0396, | |
| "step": 17525 | |
| }, | |
| { | |
| "epoch": 11.79, | |
| "grad_norm": 2.7960753440856934, | |
| "learning_rate": 8.287236180904524e-06, | |
| "loss": 0.0406, | |
| "step": 17550 | |
| }, | |
| { | |
| "epoch": 11.8, | |
| "grad_norm": 3.0115575790405273, | |
| "learning_rate": 8.284723618090453e-06, | |
| "loss": 0.0395, | |
| "step": 17575 | |
| }, | |
| { | |
| "epoch": 11.82, | |
| "grad_norm": 2.4014508724212646, | |
| "learning_rate": 8.282211055276383e-06, | |
| "loss": 0.0404, | |
| "step": 17600 | |
| }, | |
| { | |
| "epoch": 11.84, | |
| "grad_norm": 3.1004748344421387, | |
| "learning_rate": 8.279698492462312e-06, | |
| "loss": 0.0385, | |
| "step": 17625 | |
| }, | |
| { | |
| "epoch": 11.85, | |
| "grad_norm": 2.5941948890686035, | |
| "learning_rate": 8.277185929648241e-06, | |
| "loss": 0.0398, | |
| "step": 17650 | |
| }, | |
| { | |
| "epoch": 11.87, | |
| "grad_norm": 2.6056137084960938, | |
| "learning_rate": 8.274673366834172e-06, | |
| "loss": 0.0381, | |
| "step": 17675 | |
| }, | |
| { | |
| "epoch": 11.89, | |
| "grad_norm": 2.8399932384490967, | |
| "learning_rate": 8.2721608040201e-06, | |
| "loss": 0.0401, | |
| "step": 17700 | |
| }, | |
| { | |
| "epoch": 11.9, | |
| "grad_norm": 2.9396562576293945, | |
| "learning_rate": 8.269648241206031e-06, | |
| "loss": 0.0409, | |
| "step": 17725 | |
| }, | |
| { | |
| "epoch": 11.92, | |
| "grad_norm": 3.1237053871154785, | |
| "learning_rate": 8.26713567839196e-06, | |
| "loss": 0.039, | |
| "step": 17750 | |
| }, | |
| { | |
| "epoch": 11.94, | |
| "grad_norm": 3.0028700828552246, | |
| "learning_rate": 8.26462311557789e-06, | |
| "loss": 0.0421, | |
| "step": 17775 | |
| }, | |
| { | |
| "epoch": 11.95, | |
| "grad_norm": 3.055807590484619, | |
| "learning_rate": 8.26211055276382e-06, | |
| "loss": 0.0405, | |
| "step": 17800 | |
| }, | |
| { | |
| "epoch": 11.97, | |
| "grad_norm": 3.251986026763916, | |
| "learning_rate": 8.25959798994975e-06, | |
| "loss": 0.0433, | |
| "step": 17825 | |
| }, | |
| { | |
| "epoch": 11.99, | |
| "grad_norm": 2.845550537109375, | |
| "learning_rate": 8.25708542713568e-06, | |
| "loss": 0.0385, | |
| "step": 17850 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "grad_norm": 2.913346290588379, | |
| "learning_rate": 8.254572864321609e-06, | |
| "loss": 0.0378, | |
| "step": 17875 | |
| }, | |
| { | |
| "epoch": 12.02, | |
| "grad_norm": 2.3991270065307617, | |
| "learning_rate": 8.252060301507538e-06, | |
| "loss": 0.0294, | |
| "step": 17900 | |
| }, | |
| { | |
| "epoch": 12.04, | |
| "grad_norm": 2.4414055347442627, | |
| "learning_rate": 8.249547738693467e-06, | |
| "loss": 0.0306, | |
| "step": 17925 | |
| }, | |
| { | |
| "epoch": 12.06, | |
| "grad_norm": 2.274725914001465, | |
| "learning_rate": 8.247035175879398e-06, | |
| "loss": 0.0295, | |
| "step": 17950 | |
| }, | |
| { | |
| "epoch": 12.07, | |
| "grad_norm": 2.767655849456787, | |
| "learning_rate": 8.244522613065328e-06, | |
| "loss": 0.0307, | |
| "step": 17975 | |
| }, | |
| { | |
| "epoch": 12.09, | |
| "grad_norm": 2.5598373413085938, | |
| "learning_rate": 8.242010050251257e-06, | |
| "loss": 0.0307, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 12.09, | |
| "eval_loss": 0.1446864753961563, | |
| "eval_runtime": 537.4834, | |
| "eval_samples_per_second": 2.579, | |
| "eval_steps_per_second": 2.579, | |
| "eval_wer": 27.238212961327918, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 12.11, | |
| "grad_norm": 2.654730796813965, | |
| "learning_rate": 8.239497487437186e-06, | |
| "loss": 0.0303, | |
| "step": 18025 | |
| }, | |
| { | |
| "epoch": 12.12, | |
| "grad_norm": 2.6578266620635986, | |
| "learning_rate": 8.236984924623116e-06, | |
| "loss": 0.0298, | |
| "step": 18050 | |
| }, | |
| { | |
| "epoch": 12.14, | |
| "grad_norm": 3.2597641944885254, | |
| "learning_rate": 8.234472361809047e-06, | |
| "loss": 0.0307, | |
| "step": 18075 | |
| }, | |
| { | |
| "epoch": 12.16, | |
| "grad_norm": 3.1756911277770996, | |
| "learning_rate": 8.231959798994976e-06, | |
| "loss": 0.0303, | |
| "step": 18100 | |
| }, | |
| { | |
| "epoch": 12.17, | |
| "grad_norm": 2.3517801761627197, | |
| "learning_rate": 8.229447236180905e-06, | |
| "loss": 0.0299, | |
| "step": 18125 | |
| }, | |
| { | |
| "epoch": 12.19, | |
| "grad_norm": 2.7081449031829834, | |
| "learning_rate": 8.226934673366835e-06, | |
| "loss": 0.0317, | |
| "step": 18150 | |
| }, | |
| { | |
| "epoch": 12.21, | |
| "grad_norm": 2.9442265033721924, | |
| "learning_rate": 8.224422110552764e-06, | |
| "loss": 0.0309, | |
| "step": 18175 | |
| }, | |
| { | |
| "epoch": 12.22, | |
| "grad_norm": 2.202742099761963, | |
| "learning_rate": 8.221909547738695e-06, | |
| "loss": 0.0299, | |
| "step": 18200 | |
| }, | |
| { | |
| "epoch": 12.24, | |
| "grad_norm": 2.683105230331421, | |
| "learning_rate": 8.219396984924624e-06, | |
| "loss": 0.0303, | |
| "step": 18225 | |
| }, | |
| { | |
| "epoch": 12.26, | |
| "grad_norm": 2.4034810066223145, | |
| "learning_rate": 8.216884422110554e-06, | |
| "loss": 0.0319, | |
| "step": 18250 | |
| }, | |
| { | |
| "epoch": 12.27, | |
| "grad_norm": 2.621290683746338, | |
| "learning_rate": 8.214371859296483e-06, | |
| "loss": 0.0318, | |
| "step": 18275 | |
| }, | |
| { | |
| "epoch": 12.29, | |
| "grad_norm": 2.842874765396118, | |
| "learning_rate": 8.211859296482412e-06, | |
| "loss": 0.0332, | |
| "step": 18300 | |
| }, | |
| { | |
| "epoch": 12.31, | |
| "grad_norm": 2.4797563552856445, | |
| "learning_rate": 8.209346733668342e-06, | |
| "loss": 0.0325, | |
| "step": 18325 | |
| }, | |
| { | |
| "epoch": 12.32, | |
| "grad_norm": 2.8069446086883545, | |
| "learning_rate": 8.206834170854273e-06, | |
| "loss": 0.033, | |
| "step": 18350 | |
| }, | |
| { | |
| "epoch": 12.34, | |
| "grad_norm": 2.9851083755493164, | |
| "learning_rate": 8.204321608040202e-06, | |
| "loss": 0.0321, | |
| "step": 18375 | |
| }, | |
| { | |
| "epoch": 12.36, | |
| "grad_norm": 2.948084592819214, | |
| "learning_rate": 8.201809045226131e-06, | |
| "loss": 0.0338, | |
| "step": 18400 | |
| }, | |
| { | |
| "epoch": 12.37, | |
| "grad_norm": 2.7898919582366943, | |
| "learning_rate": 8.19929648241206e-06, | |
| "loss": 0.0315, | |
| "step": 18425 | |
| }, | |
| { | |
| "epoch": 12.39, | |
| "grad_norm": 2.366434097290039, | |
| "learning_rate": 8.19678391959799e-06, | |
| "loss": 0.032, | |
| "step": 18450 | |
| }, | |
| { | |
| "epoch": 12.41, | |
| "grad_norm": 2.9562463760375977, | |
| "learning_rate": 8.194271356783921e-06, | |
| "loss": 0.0334, | |
| "step": 18475 | |
| }, | |
| { | |
| "epoch": 12.42, | |
| "grad_norm": 2.5975656509399414, | |
| "learning_rate": 8.19175879396985e-06, | |
| "loss": 0.0331, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 12.44, | |
| "grad_norm": 2.8374183177948, | |
| "learning_rate": 8.18924623115578e-06, | |
| "loss": 0.0318, | |
| "step": 18525 | |
| }, | |
| { | |
| "epoch": 12.46, | |
| "grad_norm": 2.839860439300537, | |
| "learning_rate": 8.186733668341709e-06, | |
| "loss": 0.0324, | |
| "step": 18550 | |
| }, | |
| { | |
| "epoch": 12.47, | |
| "grad_norm": 2.800180196762085, | |
| "learning_rate": 8.184221105527638e-06, | |
| "loss": 0.0309, | |
| "step": 18575 | |
| }, | |
| { | |
| "epoch": 12.49, | |
| "grad_norm": 2.644583225250244, | |
| "learning_rate": 8.18170854271357e-06, | |
| "loss": 0.0331, | |
| "step": 18600 | |
| }, | |
| { | |
| "epoch": 12.51, | |
| "grad_norm": 3.0358402729034424, | |
| "learning_rate": 8.179195979899498e-06, | |
| "loss": 0.0327, | |
| "step": 18625 | |
| }, | |
| { | |
| "epoch": 12.53, | |
| "grad_norm": 2.807608127593994, | |
| "learning_rate": 8.176683417085428e-06, | |
| "loss": 0.032, | |
| "step": 18650 | |
| }, | |
| { | |
| "epoch": 12.54, | |
| "grad_norm": 3.115736961364746, | |
| "learning_rate": 8.174170854271357e-06, | |
| "loss": 0.034, | |
| "step": 18675 | |
| }, | |
| { | |
| "epoch": 12.56, | |
| "grad_norm": 2.563960313796997, | |
| "learning_rate": 8.171658291457286e-06, | |
| "loss": 0.0325, | |
| "step": 18700 | |
| }, | |
| { | |
| "epoch": 12.58, | |
| "grad_norm": 2.6218457221984863, | |
| "learning_rate": 8.169145728643216e-06, | |
| "loss": 0.0312, | |
| "step": 18725 | |
| }, | |
| { | |
| "epoch": 12.59, | |
| "grad_norm": 2.6230452060699463, | |
| "learning_rate": 8.166633165829147e-06, | |
| "loss": 0.0318, | |
| "step": 18750 | |
| }, | |
| { | |
| "epoch": 12.61, | |
| "grad_norm": 3.0028395652770996, | |
| "learning_rate": 8.164120603015076e-06, | |
| "loss": 0.0339, | |
| "step": 18775 | |
| }, | |
| { | |
| "epoch": 12.63, | |
| "grad_norm": 2.810173273086548, | |
| "learning_rate": 8.161608040201005e-06, | |
| "loss": 0.0337, | |
| "step": 18800 | |
| }, | |
| { | |
| "epoch": 12.64, | |
| "grad_norm": 2.7154364585876465, | |
| "learning_rate": 8.159095477386936e-06, | |
| "loss": 0.0315, | |
| "step": 18825 | |
| }, | |
| { | |
| "epoch": 12.66, | |
| "grad_norm": 2.9645156860351562, | |
| "learning_rate": 8.156582914572864e-06, | |
| "loss": 0.0341, | |
| "step": 18850 | |
| }, | |
| { | |
| "epoch": 12.68, | |
| "grad_norm": 2.558562755584717, | |
| "learning_rate": 8.154070351758795e-06, | |
| "loss": 0.0321, | |
| "step": 18875 | |
| }, | |
| { | |
| "epoch": 12.69, | |
| "grad_norm": 3.045975923538208, | |
| "learning_rate": 8.151557788944724e-06, | |
| "loss": 0.0328, | |
| "step": 18900 | |
| }, | |
| { | |
| "epoch": 12.71, | |
| "grad_norm": 2.605736494064331, | |
| "learning_rate": 8.149045226130654e-06, | |
| "loss": 0.0338, | |
| "step": 18925 | |
| }, | |
| { | |
| "epoch": 12.73, | |
| "grad_norm": 2.6503992080688477, | |
| "learning_rate": 8.146532663316583e-06, | |
| "loss": 0.0349, | |
| "step": 18950 | |
| }, | |
| { | |
| "epoch": 12.74, | |
| "grad_norm": 2.7485363483428955, | |
| "learning_rate": 8.144020100502512e-06, | |
| "loss": 0.0331, | |
| "step": 18975 | |
| }, | |
| { | |
| "epoch": 12.76, | |
| "grad_norm": 3.0558133125305176, | |
| "learning_rate": 8.141507537688443e-06, | |
| "loss": 0.0331, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 12.76, | |
| "eval_loss": 0.15126191079616547, | |
| "eval_runtime": 542.0176, | |
| "eval_samples_per_second": 2.557, | |
| "eval_steps_per_second": 2.557, | |
| "eval_wer": 28.129966448878683, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 12.78, | |
| "grad_norm": 3.117704391479492, | |
| "learning_rate": 8.138994974874373e-06, | |
| "loss": 0.0336, | |
| "step": 19025 | |
| }, | |
| { | |
| "epoch": 12.79, | |
| "grad_norm": 2.7645487785339355, | |
| "learning_rate": 8.136482412060302e-06, | |
| "loss": 0.0324, | |
| "step": 19050 | |
| }, | |
| { | |
| "epoch": 12.81, | |
| "grad_norm": 2.742771625518799, | |
| "learning_rate": 8.133969849246231e-06, | |
| "loss": 0.0331, | |
| "step": 19075 | |
| }, | |
| { | |
| "epoch": 12.83, | |
| "grad_norm": 2.8407609462738037, | |
| "learning_rate": 8.131457286432162e-06, | |
| "loss": 0.0317, | |
| "step": 19100 | |
| }, | |
| { | |
| "epoch": 12.84, | |
| "grad_norm": 2.5845396518707275, | |
| "learning_rate": 8.12894472361809e-06, | |
| "loss": 0.0335, | |
| "step": 19125 | |
| }, | |
| { | |
| "epoch": 12.86, | |
| "grad_norm": 2.8739688396453857, | |
| "learning_rate": 8.126432160804021e-06, | |
| "loss": 0.0333, | |
| "step": 19150 | |
| }, | |
| { | |
| "epoch": 12.88, | |
| "grad_norm": 3.1160261631011963, | |
| "learning_rate": 8.12391959798995e-06, | |
| "loss": 0.033, | |
| "step": 19175 | |
| }, | |
| { | |
| "epoch": 12.89, | |
| "grad_norm": 2.978895902633667, | |
| "learning_rate": 8.12140703517588e-06, | |
| "loss": 0.0358, | |
| "step": 19200 | |
| }, | |
| { | |
| "epoch": 12.91, | |
| "grad_norm": 3.0800576210021973, | |
| "learning_rate": 8.11889447236181e-06, | |
| "loss": 0.0335, | |
| "step": 19225 | |
| }, | |
| { | |
| "epoch": 12.93, | |
| "grad_norm": 2.4890170097351074, | |
| "learning_rate": 8.11638190954774e-06, | |
| "loss": 0.034, | |
| "step": 19250 | |
| }, | |
| { | |
| "epoch": 12.94, | |
| "grad_norm": 2.8995964527130127, | |
| "learning_rate": 8.11386934673367e-06, | |
| "loss": 0.0342, | |
| "step": 19275 | |
| }, | |
| { | |
| "epoch": 12.96, | |
| "grad_norm": 2.8822238445281982, | |
| "learning_rate": 8.111356783919599e-06, | |
| "loss": 0.0338, | |
| "step": 19300 | |
| }, | |
| { | |
| "epoch": 12.98, | |
| "grad_norm": 2.3847439289093018, | |
| "learning_rate": 8.108844221105528e-06, | |
| "loss": 0.0345, | |
| "step": 19325 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "grad_norm": 2.5077168941497803, | |
| "learning_rate": 8.106331658291457e-06, | |
| "loss": 0.0323, | |
| "step": 19350 | |
| }, | |
| { | |
| "epoch": 13.01, | |
| "grad_norm": 2.0860869884490967, | |
| "learning_rate": 8.103819095477388e-06, | |
| "loss": 0.0256, | |
| "step": 19375 | |
| }, | |
| { | |
| "epoch": 13.03, | |
| "grad_norm": 2.4186856746673584, | |
| "learning_rate": 8.101306532663318e-06, | |
| "loss": 0.025, | |
| "step": 19400 | |
| }, | |
| { | |
| "epoch": 13.05, | |
| "grad_norm": 2.169545888900757, | |
| "learning_rate": 8.098793969849247e-06, | |
| "loss": 0.024, | |
| "step": 19425 | |
| }, | |
| { | |
| "epoch": 13.06, | |
| "grad_norm": 2.250295877456665, | |
| "learning_rate": 8.096281407035176e-06, | |
| "loss": 0.0227, | |
| "step": 19450 | |
| }, | |
| { | |
| "epoch": 13.08, | |
| "grad_norm": 2.8207223415374756, | |
| "learning_rate": 8.093768844221106e-06, | |
| "loss": 0.0254, | |
| "step": 19475 | |
| }, | |
| { | |
| "epoch": 13.1, | |
| "grad_norm": 2.4845900535583496, | |
| "learning_rate": 8.091256281407037e-06, | |
| "loss": 0.0251, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 13.11, | |
| "grad_norm": 2.9678895473480225, | |
| "learning_rate": 8.088743718592966e-06, | |
| "loss": 0.0255, | |
| "step": 19525 | |
| }, | |
| { | |
| "epoch": 13.13, | |
| "grad_norm": 3.0639657974243164, | |
| "learning_rate": 8.086231155778895e-06, | |
| "loss": 0.0266, | |
| "step": 19550 | |
| }, | |
| { | |
| "epoch": 13.15, | |
| "grad_norm": 2.5778753757476807, | |
| "learning_rate": 8.083718592964825e-06, | |
| "loss": 0.0258, | |
| "step": 19575 | |
| }, | |
| { | |
| "epoch": 13.16, | |
| "grad_norm": 2.3090131282806396, | |
| "learning_rate": 8.081206030150754e-06, | |
| "loss": 0.0234, | |
| "step": 19600 | |
| }, | |
| { | |
| "epoch": 13.18, | |
| "grad_norm": 2.645989418029785, | |
| "learning_rate": 8.078693467336685e-06, | |
| "loss": 0.0243, | |
| "step": 19625 | |
| }, | |
| { | |
| "epoch": 13.2, | |
| "grad_norm": 2.4817280769348145, | |
| "learning_rate": 8.076180904522614e-06, | |
| "loss": 0.0274, | |
| "step": 19650 | |
| }, | |
| { | |
| "epoch": 13.21, | |
| "grad_norm": 2.17031192779541, | |
| "learning_rate": 8.073668341708544e-06, | |
| "loss": 0.024, | |
| "step": 19675 | |
| }, | |
| { | |
| "epoch": 13.23, | |
| "grad_norm": 2.587280035018921, | |
| "learning_rate": 8.071155778894473e-06, | |
| "loss": 0.0258, | |
| "step": 19700 | |
| }, | |
| { | |
| "epoch": 13.25, | |
| "grad_norm": 2.3844306468963623, | |
| "learning_rate": 8.068643216080402e-06, | |
| "loss": 0.0264, | |
| "step": 19725 | |
| }, | |
| { | |
| "epoch": 13.26, | |
| "grad_norm": 2.440300226211548, | |
| "learning_rate": 8.066130653266332e-06, | |
| "loss": 0.0259, | |
| "step": 19750 | |
| }, | |
| { | |
| "epoch": 13.28, | |
| "grad_norm": 2.120274543762207, | |
| "learning_rate": 8.063618090452263e-06, | |
| "loss": 0.0253, | |
| "step": 19775 | |
| }, | |
| { | |
| "epoch": 13.3, | |
| "grad_norm": 2.412203073501587, | |
| "learning_rate": 8.061105527638192e-06, | |
| "loss": 0.0256, | |
| "step": 19800 | |
| }, | |
| { | |
| "epoch": 13.31, | |
| "grad_norm": 2.3215441703796387, | |
| "learning_rate": 8.058592964824121e-06, | |
| "loss": 0.0247, | |
| "step": 19825 | |
| }, | |
| { | |
| "epoch": 13.33, | |
| "grad_norm": 2.0729939937591553, | |
| "learning_rate": 8.05608040201005e-06, | |
| "loss": 0.0248, | |
| "step": 19850 | |
| }, | |
| { | |
| "epoch": 13.35, | |
| "grad_norm": 2.622880697250366, | |
| "learning_rate": 8.05356783919598e-06, | |
| "loss": 0.0271, | |
| "step": 19875 | |
| }, | |
| { | |
| "epoch": 13.36, | |
| "grad_norm": 2.5304481983184814, | |
| "learning_rate": 8.051055276381911e-06, | |
| "loss": 0.0255, | |
| "step": 19900 | |
| }, | |
| { | |
| "epoch": 13.38, | |
| "grad_norm": 2.6204922199249268, | |
| "learning_rate": 8.04854271356784e-06, | |
| "loss": 0.0261, | |
| "step": 19925 | |
| }, | |
| { | |
| "epoch": 13.4, | |
| "grad_norm": 2.284783363342285, | |
| "learning_rate": 8.04603015075377e-06, | |
| "loss": 0.0257, | |
| "step": 19950 | |
| }, | |
| { | |
| "epoch": 13.42, | |
| "grad_norm": 3.0914671421051025, | |
| "learning_rate": 8.043517587939699e-06, | |
| "loss": 0.027, | |
| "step": 19975 | |
| }, | |
| { | |
| "epoch": 13.43, | |
| "grad_norm": 2.8612654209136963, | |
| "learning_rate": 8.041005025125628e-06, | |
| "loss": 0.0258, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 13.43, | |
| "eval_loss": 0.15857619047164917, | |
| "eval_runtime": 534.77, | |
| "eval_samples_per_second": 2.592, | |
| "eval_steps_per_second": 2.592, | |
| "eval_wer": 28.809818117605506, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 13.45, | |
| "grad_norm": 3.074786424636841, | |
| "learning_rate": 8.03849246231156e-06, | |
| "loss": 0.026, | |
| "step": 20025 | |
| }, | |
| { | |
| "epoch": 13.47, | |
| "grad_norm": 2.40915584564209, | |
| "learning_rate": 8.035979899497489e-06, | |
| "loss": 0.029, | |
| "step": 20050 | |
| }, | |
| { | |
| "epoch": 13.48, | |
| "grad_norm": 2.7619211673736572, | |
| "learning_rate": 8.033467336683418e-06, | |
| "loss": 0.0261, | |
| "step": 20075 | |
| }, | |
| { | |
| "epoch": 13.5, | |
| "grad_norm": 2.8454036712646484, | |
| "learning_rate": 8.030954773869347e-06, | |
| "loss": 0.0257, | |
| "step": 20100 | |
| }, | |
| { | |
| "epoch": 13.52, | |
| "grad_norm": 2.519239664077759, | |
| "learning_rate": 8.028442211055277e-06, | |
| "loss": 0.0255, | |
| "step": 20125 | |
| }, | |
| { | |
| "epoch": 13.53, | |
| "grad_norm": 2.798295736312866, | |
| "learning_rate": 8.025929648241206e-06, | |
| "loss": 0.0256, | |
| "step": 20150 | |
| }, | |
| { | |
| "epoch": 13.55, | |
| "grad_norm": 2.658249855041504, | |
| "learning_rate": 8.023417085427137e-06, | |
| "loss": 0.0252, | |
| "step": 20175 | |
| }, | |
| { | |
| "epoch": 13.57, | |
| "grad_norm": 2.55195689201355, | |
| "learning_rate": 8.020904522613066e-06, | |
| "loss": 0.0281, | |
| "step": 20200 | |
| }, | |
| { | |
| "epoch": 13.58, | |
| "grad_norm": 2.282550096511841, | |
| "learning_rate": 8.018391959798996e-06, | |
| "loss": 0.0262, | |
| "step": 20225 | |
| }, | |
| { | |
| "epoch": 13.6, | |
| "grad_norm": 2.6260697841644287, | |
| "learning_rate": 8.015879396984927e-06, | |
| "loss": 0.0249, | |
| "step": 20250 | |
| }, | |
| { | |
| "epoch": 13.62, | |
| "grad_norm": 2.61671781539917, | |
| "learning_rate": 8.013366834170854e-06, | |
| "loss": 0.0276, | |
| "step": 20275 | |
| }, | |
| { | |
| "epoch": 13.63, | |
| "grad_norm": 2.5859358310699463, | |
| "learning_rate": 8.010854271356785e-06, | |
| "loss": 0.0265, | |
| "step": 20300 | |
| }, | |
| { | |
| "epoch": 13.65, | |
| "grad_norm": 2.6100573539733887, | |
| "learning_rate": 8.008341708542714e-06, | |
| "loss": 0.0258, | |
| "step": 20325 | |
| }, | |
| { | |
| "epoch": 13.67, | |
| "grad_norm": 2.5182266235351562, | |
| "learning_rate": 8.005829145728644e-06, | |
| "loss": 0.028, | |
| "step": 20350 | |
| }, | |
| { | |
| "epoch": 13.68, | |
| "grad_norm": 3.105220317840576, | |
| "learning_rate": 8.003316582914573e-06, | |
| "loss": 0.027, | |
| "step": 20375 | |
| }, | |
| { | |
| "epoch": 13.7, | |
| "grad_norm": 2.7697339057922363, | |
| "learning_rate": 8.000804020100502e-06, | |
| "loss": 0.0274, | |
| "step": 20400 | |
| }, | |
| { | |
| "epoch": 13.72, | |
| "grad_norm": 2.74824857711792, | |
| "learning_rate": 7.998291457286432e-06, | |
| "loss": 0.0264, | |
| "step": 20425 | |
| }, | |
| { | |
| "epoch": 13.73, | |
| "grad_norm": 2.1460442543029785, | |
| "learning_rate": 7.995778894472363e-06, | |
| "loss": 0.0266, | |
| "step": 20450 | |
| }, | |
| { | |
| "epoch": 13.75, | |
| "grad_norm": 2.700098991394043, | |
| "learning_rate": 7.993266331658292e-06, | |
| "loss": 0.0271, | |
| "step": 20475 | |
| }, | |
| { | |
| "epoch": 13.77, | |
| "grad_norm": 3.0646328926086426, | |
| "learning_rate": 7.990753768844221e-06, | |
| "loss": 0.0273, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 13.78, | |
| "grad_norm": 2.4817585945129395, | |
| "learning_rate": 7.988241206030152e-06, | |
| "loss": 0.0267, | |
| "step": 20525 | |
| }, | |
| { | |
| "epoch": 13.8, | |
| "grad_norm": 2.383892059326172, | |
| "learning_rate": 7.98572864321608e-06, | |
| "loss": 0.0281, | |
| "step": 20550 | |
| }, | |
| { | |
| "epoch": 13.82, | |
| "grad_norm": 2.6712028980255127, | |
| "learning_rate": 7.983216080402011e-06, | |
| "loss": 0.0262, | |
| "step": 20575 | |
| }, | |
| { | |
| "epoch": 13.83, | |
| "grad_norm": 2.8054888248443604, | |
| "learning_rate": 7.98070351758794e-06, | |
| "loss": 0.0277, | |
| "step": 20600 | |
| }, | |
| { | |
| "epoch": 13.85, | |
| "grad_norm": 2.520451545715332, | |
| "learning_rate": 7.97819095477387e-06, | |
| "loss": 0.0256, | |
| "step": 20625 | |
| }, | |
| { | |
| "epoch": 13.87, | |
| "grad_norm": 2.6715471744537354, | |
| "learning_rate": 7.975678391959799e-06, | |
| "loss": 0.0271, | |
| "step": 20650 | |
| }, | |
| { | |
| "epoch": 13.89, | |
| "grad_norm": 2.936898946762085, | |
| "learning_rate": 7.973165829145728e-06, | |
| "loss": 0.0271, | |
| "step": 20675 | |
| }, | |
| { | |
| "epoch": 13.9, | |
| "grad_norm": 2.5876598358154297, | |
| "learning_rate": 7.97065326633166e-06, | |
| "loss": 0.0254, | |
| "step": 20700 | |
| }, | |
| { | |
| "epoch": 13.92, | |
| "grad_norm": 2.576573133468628, | |
| "learning_rate": 7.968140703517589e-06, | |
| "loss": 0.0268, | |
| "step": 20725 | |
| }, | |
| { | |
| "epoch": 13.94, | |
| "grad_norm": 2.962134838104248, | |
| "learning_rate": 7.965628140703518e-06, | |
| "loss": 0.028, | |
| "step": 20750 | |
| }, | |
| { | |
| "epoch": 13.95, | |
| "grad_norm": 2.4978857040405273, | |
| "learning_rate": 7.963115577889447e-06, | |
| "loss": 0.0268, | |
| "step": 20775 | |
| }, | |
| { | |
| "epoch": 13.97, | |
| "grad_norm": 2.7507359981536865, | |
| "learning_rate": 7.960603015075378e-06, | |
| "loss": 0.0264, | |
| "step": 20800 | |
| }, | |
| { | |
| "epoch": 13.99, | |
| "grad_norm": 2.290602922439575, | |
| "learning_rate": 7.958090452261306e-06, | |
| "loss": 0.0268, | |
| "step": 20825 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "grad_norm": 1.895709753036499, | |
| "learning_rate": 7.955577889447237e-06, | |
| "loss": 0.0267, | |
| "step": 20850 | |
| }, | |
| { | |
| "epoch": 14.02, | |
| "grad_norm": 2.577284097671509, | |
| "learning_rate": 7.953065326633166e-06, | |
| "loss": 0.02, | |
| "step": 20875 | |
| }, | |
| { | |
| "epoch": 14.04, | |
| "grad_norm": 2.139061450958252, | |
| "learning_rate": 7.950552763819096e-06, | |
| "loss": 0.0182, | |
| "step": 20900 | |
| }, | |
| { | |
| "epoch": 14.05, | |
| "grad_norm": 2.31142520904541, | |
| "learning_rate": 7.948040201005027e-06, | |
| "loss": 0.0189, | |
| "step": 20925 | |
| }, | |
| { | |
| "epoch": 14.07, | |
| "grad_norm": 2.4628167152404785, | |
| "learning_rate": 7.945527638190954e-06, | |
| "loss": 0.0191, | |
| "step": 20950 | |
| }, | |
| { | |
| "epoch": 14.09, | |
| "grad_norm": 2.2550642490386963, | |
| "learning_rate": 7.943015075376885e-06, | |
| "loss": 0.0205, | |
| "step": 20975 | |
| }, | |
| { | |
| "epoch": 14.1, | |
| "grad_norm": 2.5067131519317627, | |
| "learning_rate": 7.940502512562815e-06, | |
| "loss": 0.0193, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 14.1, | |
| "eval_loss": 0.16441361606121063, | |
| "eval_runtime": 532.0683, | |
| "eval_samples_per_second": 2.605, | |
| "eval_steps_per_second": 2.605, | |
| "eval_wer": 28.28006357054565, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 14.12, | |
| "grad_norm": 2.0792436599731445, | |
| "learning_rate": 7.937989949748744e-06, | |
| "loss": 0.0202, | |
| "step": 21025 | |
| }, | |
| { | |
| "epoch": 14.14, | |
| "grad_norm": 2.0055572986602783, | |
| "learning_rate": 7.935477386934673e-06, | |
| "loss": 0.02, | |
| "step": 21050 | |
| }, | |
| { | |
| "epoch": 14.15, | |
| "grad_norm": 2.557342052459717, | |
| "learning_rate": 7.932964824120604e-06, | |
| "loss": 0.0202, | |
| "step": 21075 | |
| }, | |
| { | |
| "epoch": 14.17, | |
| "grad_norm": 2.351605176925659, | |
| "learning_rate": 7.930452261306534e-06, | |
| "loss": 0.0205, | |
| "step": 21100 | |
| }, | |
| { | |
| "epoch": 14.19, | |
| "grad_norm": 2.4522876739501953, | |
| "learning_rate": 7.927939698492463e-06, | |
| "loss": 0.0197, | |
| "step": 21125 | |
| }, | |
| { | |
| "epoch": 14.2, | |
| "grad_norm": 1.9259110689163208, | |
| "learning_rate": 7.925527638190955e-06, | |
| "loss": 0.019, | |
| "step": 21150 | |
| }, | |
| { | |
| "epoch": 14.22, | |
| "grad_norm": 2.6869237422943115, | |
| "learning_rate": 7.923015075376886e-06, | |
| "loss": 0.0191, | |
| "step": 21175 | |
| }, | |
| { | |
| "epoch": 14.24, | |
| "grad_norm": 2.1610636711120605, | |
| "learning_rate": 7.920502512562815e-06, | |
| "loss": 0.0199, | |
| "step": 21200 | |
| }, | |
| { | |
| "epoch": 14.25, | |
| "grad_norm": 2.3419833183288574, | |
| "learning_rate": 7.917989949748744e-06, | |
| "loss": 0.0205, | |
| "step": 21225 | |
| }, | |
| { | |
| "epoch": 14.27, | |
| "grad_norm": 2.655822277069092, | |
| "learning_rate": 7.915477386934674e-06, | |
| "loss": 0.0211, | |
| "step": 21250 | |
| }, | |
| { | |
| "epoch": 14.29, | |
| "grad_norm": 2.3895249366760254, | |
| "learning_rate": 7.912964824120603e-06, | |
| "loss": 0.02, | |
| "step": 21275 | |
| }, | |
| { | |
| "epoch": 14.3, | |
| "grad_norm": 2.626079559326172, | |
| "learning_rate": 7.910452261306534e-06, | |
| "loss": 0.0204, | |
| "step": 21300 | |
| }, | |
| { | |
| "epoch": 14.32, | |
| "grad_norm": 2.4946000576019287, | |
| "learning_rate": 7.907939698492463e-06, | |
| "loss": 0.0211, | |
| "step": 21325 | |
| }, | |
| { | |
| "epoch": 14.34, | |
| "grad_norm": 2.2254092693328857, | |
| "learning_rate": 7.905427135678393e-06, | |
| "loss": 0.0209, | |
| "step": 21350 | |
| }, | |
| { | |
| "epoch": 14.36, | |
| "grad_norm": 2.813023328781128, | |
| "learning_rate": 7.902914572864322e-06, | |
| "loss": 0.0205, | |
| "step": 21375 | |
| }, | |
| { | |
| "epoch": 14.37, | |
| "grad_norm": 2.3448939323425293, | |
| "learning_rate": 7.900402010050253e-06, | |
| "loss": 0.0204, | |
| "step": 21400 | |
| }, | |
| { | |
| "epoch": 14.39, | |
| "grad_norm": 2.1861133575439453, | |
| "learning_rate": 7.89788944723618e-06, | |
| "loss": 0.0211, | |
| "step": 21425 | |
| }, | |
| { | |
| "epoch": 14.41, | |
| "grad_norm": 2.1422207355499268, | |
| "learning_rate": 7.895376884422111e-06, | |
| "loss": 0.0224, | |
| "step": 21450 | |
| }, | |
| { | |
| "epoch": 14.42, | |
| "grad_norm": 2.713761329650879, | |
| "learning_rate": 7.89286432160804e-06, | |
| "loss": 0.02, | |
| "step": 21475 | |
| }, | |
| { | |
| "epoch": 14.44, | |
| "grad_norm": 2.430680274963379, | |
| "learning_rate": 7.89035175879397e-06, | |
| "loss": 0.0218, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 14.46, | |
| "grad_norm": 2.974393606185913, | |
| "learning_rate": 7.887839195979901e-06, | |
| "loss": 0.0197, | |
| "step": 21525 | |
| }, | |
| { | |
| "epoch": 14.47, | |
| "grad_norm": 2.530994415283203, | |
| "learning_rate": 7.885326633165829e-06, | |
| "loss": 0.0221, | |
| "step": 21550 | |
| }, | |
| { | |
| "epoch": 14.49, | |
| "grad_norm": 2.5071282386779785, | |
| "learning_rate": 7.88281407035176e-06, | |
| "loss": 0.0214, | |
| "step": 21575 | |
| }, | |
| { | |
| "epoch": 14.51, | |
| "grad_norm": 2.2111854553222656, | |
| "learning_rate": 7.880301507537689e-06, | |
| "loss": 0.0208, | |
| "step": 21600 | |
| }, | |
| { | |
| "epoch": 14.52, | |
| "grad_norm": 2.194091320037842, | |
| "learning_rate": 7.877788944723618e-06, | |
| "loss": 0.0203, | |
| "step": 21625 | |
| }, | |
| { | |
| "epoch": 14.54, | |
| "grad_norm": 2.2206263542175293, | |
| "learning_rate": 7.875276381909548e-06, | |
| "loss": 0.0221, | |
| "step": 21650 | |
| }, | |
| { | |
| "epoch": 14.56, | |
| "grad_norm": 2.425065279006958, | |
| "learning_rate": 7.872763819095479e-06, | |
| "loss": 0.0211, | |
| "step": 21675 | |
| }, | |
| { | |
| "epoch": 14.57, | |
| "grad_norm": 2.6152865886688232, | |
| "learning_rate": 7.870251256281408e-06, | |
| "loss": 0.0207, | |
| "step": 21700 | |
| }, | |
| { | |
| "epoch": 14.59, | |
| "grad_norm": 2.2612714767456055, | |
| "learning_rate": 7.867738693467337e-06, | |
| "loss": 0.0209, | |
| "step": 21725 | |
| }, | |
| { | |
| "epoch": 14.61, | |
| "grad_norm": 2.1470086574554443, | |
| "learning_rate": 7.865226130653267e-06, | |
| "loss": 0.021, | |
| "step": 21750 | |
| }, | |
| { | |
| "epoch": 14.62, | |
| "grad_norm": 2.484851598739624, | |
| "learning_rate": 7.862713567839196e-06, | |
| "loss": 0.02, | |
| "step": 21775 | |
| }, | |
| { | |
| "epoch": 14.64, | |
| "grad_norm": 2.4667041301727295, | |
| "learning_rate": 7.860201005025127e-06, | |
| "loss": 0.0206, | |
| "step": 21800 | |
| }, | |
| { | |
| "epoch": 14.66, | |
| "grad_norm": 2.9903693199157715, | |
| "learning_rate": 7.857688442211055e-06, | |
| "loss": 0.0219, | |
| "step": 21825 | |
| }, | |
| { | |
| "epoch": 14.67, | |
| "grad_norm": 2.6542530059814453, | |
| "learning_rate": 7.855175879396986e-06, | |
| "loss": 0.0213, | |
| "step": 21850 | |
| }, | |
| { | |
| "epoch": 14.69, | |
| "grad_norm": 2.333191394805908, | |
| "learning_rate": 7.852663316582915e-06, | |
| "loss": 0.0214, | |
| "step": 21875 | |
| }, | |
| { | |
| "epoch": 14.71, | |
| "grad_norm": 2.71769380569458, | |
| "learning_rate": 7.850150753768844e-06, | |
| "loss": 0.0215, | |
| "step": 21900 | |
| }, | |
| { | |
| "epoch": 14.72, | |
| "grad_norm": 2.4674861431121826, | |
| "learning_rate": 7.847638190954775e-06, | |
| "loss": 0.0211, | |
| "step": 21925 | |
| }, | |
| { | |
| "epoch": 14.74, | |
| "grad_norm": 2.931941270828247, | |
| "learning_rate": 7.845125628140705e-06, | |
| "loss": 0.0244, | |
| "step": 21950 | |
| }, | |
| { | |
| "epoch": 14.76, | |
| "grad_norm": 2.738786458969116, | |
| "learning_rate": 7.842613065326634e-06, | |
| "loss": 0.0218, | |
| "step": 21975 | |
| }, | |
| { | |
| "epoch": 14.78, | |
| "grad_norm": 2.375138521194458, | |
| "learning_rate": 7.840100502512563e-06, | |
| "loss": 0.0219, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 14.78, | |
| "eval_loss": 0.16828645765781403, | |
| "eval_runtime": 539.6518, | |
| "eval_samples_per_second": 2.568, | |
| "eval_steps_per_second": 2.568, | |
| "eval_wer": 28.11230796397669, | |
| "step": 22000 | |
| } | |
| ], | |
| "logging_steps": 25, | |
| "max_steps": 100000, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 68, | |
| "save_steps": 1000, | |
| "total_flos": 3.465787561869312e+19, | |
| "train_batch_size": 64, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |