| { | |
| "best_metric": 37.96787634887283, | |
| "best_model_checkpoint": "./whisper-tiny-ro/checkpoint-5000", | |
| "epoch": 17.73049645390071, | |
| "eval_steps": 1000, | |
| "global_step": 10000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.044326241134751775, | |
| "grad_norm": 28.326566696166992, | |
| "learning_rate": 2.2e-07, | |
| "loss": 1.8024, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.08865248226950355, | |
| "grad_norm": 22.18955421447754, | |
| "learning_rate": 4.7000000000000005e-07, | |
| "loss": 1.7794, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.13297872340425532, | |
| "grad_norm": 21.889328002929688, | |
| "learning_rate": 7.2e-07, | |
| "loss": 1.63, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.1773049645390071, | |
| "grad_norm": 19.02008819580078, | |
| "learning_rate": 9.7e-07, | |
| "loss": 1.5239, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.22163120567375885, | |
| "grad_norm": 16.531150817871094, | |
| "learning_rate": 1.2200000000000002e-06, | |
| "loss": 1.3903, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.26595744680851063, | |
| "grad_norm": 16.245573043823242, | |
| "learning_rate": 1.4700000000000001e-06, | |
| "loss": 1.2517, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.3102836879432624, | |
| "grad_norm": 17.484891891479492, | |
| "learning_rate": 1.72e-06, | |
| "loss": 1.1449, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.3546099290780142, | |
| "grad_norm": 13.317365646362305, | |
| "learning_rate": 1.97e-06, | |
| "loss": 1.0231, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.39893617021276595, | |
| "grad_norm": 16.296846389770508, | |
| "learning_rate": 2.2200000000000003e-06, | |
| "loss": 1.0033, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.4432624113475177, | |
| "grad_norm": 14.858762741088867, | |
| "learning_rate": 2.47e-06, | |
| "loss": 0.9183, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.4875886524822695, | |
| "grad_norm": 15.132709503173828, | |
| "learning_rate": 2.7200000000000002e-06, | |
| "loss": 0.9142, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.5319148936170213, | |
| "grad_norm": 15.102398872375488, | |
| "learning_rate": 2.97e-06, | |
| "loss": 0.8795, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.5762411347517731, | |
| "grad_norm": 15.617897033691406, | |
| "learning_rate": 3.2200000000000005e-06, | |
| "loss": 0.863, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.6205673758865248, | |
| "grad_norm": 15.325774192810059, | |
| "learning_rate": 3.4700000000000002e-06, | |
| "loss": 0.8094, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.6648936170212766, | |
| "grad_norm": 13.950435638427734, | |
| "learning_rate": 3.7200000000000004e-06, | |
| "loss": 0.8471, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.7092198581560284, | |
| "grad_norm": 17.703575134277344, | |
| "learning_rate": 3.97e-06, | |
| "loss": 0.83, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.7535460992907801, | |
| "grad_norm": 13.622574806213379, | |
| "learning_rate": 4.22e-06, | |
| "loss": 0.7605, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.7978723404255319, | |
| "grad_norm": 13.574337005615234, | |
| "learning_rate": 4.47e-06, | |
| "loss": 0.7464, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.8421985815602837, | |
| "grad_norm": 12.981876373291016, | |
| "learning_rate": 4.7200000000000005e-06, | |
| "loss": 0.767, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.8865248226950354, | |
| "grad_norm": 15.00900936126709, | |
| "learning_rate": 4.970000000000001e-06, | |
| "loss": 0.7617, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.9308510638297872, | |
| "grad_norm": 16.31970977783203, | |
| "learning_rate": 4.988421052631579e-06, | |
| "loss": 0.6962, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.975177304964539, | |
| "grad_norm": 10.05798625946045, | |
| "learning_rate": 4.9752631578947375e-06, | |
| "loss": 0.6676, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 1.0195035460992907, | |
| "grad_norm": 12.080132484436035, | |
| "learning_rate": 4.962105263157895e-06, | |
| "loss": 0.6351, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 1.0638297872340425, | |
| "grad_norm": 12.87156867980957, | |
| "learning_rate": 4.948947368421053e-06, | |
| "loss": 0.6078, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.1081560283687943, | |
| "grad_norm": 11.931558609008789, | |
| "learning_rate": 4.935789473684211e-06, | |
| "loss": 0.5755, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 1.1524822695035462, | |
| "grad_norm": 12.827286720275879, | |
| "learning_rate": 4.922631578947369e-06, | |
| "loss": 0.5679, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 1.196808510638298, | |
| "grad_norm": 12.644274711608887, | |
| "learning_rate": 4.909473684210527e-06, | |
| "loss": 0.6122, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 1.2411347517730495, | |
| "grad_norm": 12.461518287658691, | |
| "learning_rate": 4.896315789473685e-06, | |
| "loss": 0.5522, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.2854609929078014, | |
| "grad_norm": 14.115540504455566, | |
| "learning_rate": 4.8831578947368425e-06, | |
| "loss": 0.5764, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 1.3297872340425532, | |
| "grad_norm": 13.1589994430542, | |
| "learning_rate": 4.87e-06, | |
| "loss": 0.5421, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 1.374113475177305, | |
| "grad_norm": 12.696803092956543, | |
| "learning_rate": 4.856842105263158e-06, | |
| "loss": 0.5616, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 1.4184397163120568, | |
| "grad_norm": 14.510184288024902, | |
| "learning_rate": 4.843684210526316e-06, | |
| "loss": 0.5725, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.4627659574468086, | |
| "grad_norm": 11.529364585876465, | |
| "learning_rate": 4.830526315789474e-06, | |
| "loss": 0.5627, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 1.5070921985815602, | |
| "grad_norm": 12.159563064575195, | |
| "learning_rate": 4.8173684210526324e-06, | |
| "loss": 0.5452, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 1.5514184397163122, | |
| "grad_norm": 10.232617378234863, | |
| "learning_rate": 4.80421052631579e-06, | |
| "loss": 0.5192, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 1.5957446808510638, | |
| "grad_norm": 10.81043529510498, | |
| "learning_rate": 4.791052631578948e-06, | |
| "loss": 0.5151, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.6400709219858156, | |
| "grad_norm": 14.97497272491455, | |
| "learning_rate": 4.777894736842106e-06, | |
| "loss": 0.5263, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 1.6843971631205674, | |
| "grad_norm": 14.701244354248047, | |
| "learning_rate": 4.764736842105264e-06, | |
| "loss": 0.524, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 1.728723404255319, | |
| "grad_norm": 13.469274520874023, | |
| "learning_rate": 4.7515789473684216e-06, | |
| "loss": 0.5084, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 1.773049645390071, | |
| "grad_norm": 11.251127243041992, | |
| "learning_rate": 4.738421052631579e-06, | |
| "loss": 0.5444, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.773049645390071, | |
| "eval_loss": 0.604159414768219, | |
| "eval_runtime": 587.0739, | |
| "eval_samples_per_second": 6.636, | |
| "eval_steps_per_second": 0.83, | |
| "eval_wer": 48.87994586701805, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.8173758865248226, | |
| "grad_norm": 13.032508850097656, | |
| "learning_rate": 4.725263157894737e-06, | |
| "loss": 0.5314, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 1.8617021276595744, | |
| "grad_norm": 12.22535228729248, | |
| "learning_rate": 4.712105263157895e-06, | |
| "loss": 0.4761, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 1.9060283687943262, | |
| "grad_norm": 15.19352912902832, | |
| "learning_rate": 4.698947368421053e-06, | |
| "loss": 0.4742, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 1.950354609929078, | |
| "grad_norm": 10.927416801452637, | |
| "learning_rate": 4.685789473684211e-06, | |
| "loss": 0.5012, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.9946808510638299, | |
| "grad_norm": 10.421246528625488, | |
| "learning_rate": 4.672631578947369e-06, | |
| "loss": 0.5225, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 2.0390070921985815, | |
| "grad_norm": 9.38261604309082, | |
| "learning_rate": 4.6594736842105265e-06, | |
| "loss": 0.4173, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 2.0833333333333335, | |
| "grad_norm": 10.129746437072754, | |
| "learning_rate": 4.646315789473684e-06, | |
| "loss": 0.4153, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 2.127659574468085, | |
| "grad_norm": 11.529908180236816, | |
| "learning_rate": 4.633157894736842e-06, | |
| "loss": 0.3981, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 2.171985815602837, | |
| "grad_norm": 11.076881408691406, | |
| "learning_rate": 4.620000000000001e-06, | |
| "loss": 0.4152, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 2.2163120567375887, | |
| "grad_norm": 9.994644165039062, | |
| "learning_rate": 4.606842105263158e-06, | |
| "loss": 0.3926, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 2.2606382978723403, | |
| "grad_norm": 10.582009315490723, | |
| "learning_rate": 4.5936842105263165e-06, | |
| "loss": 0.4378, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 2.3049645390070923, | |
| "grad_norm": 9.771284103393555, | |
| "learning_rate": 4.580526315789474e-06, | |
| "loss": 0.3882, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 2.349290780141844, | |
| "grad_norm": 11.286993026733398, | |
| "learning_rate": 4.567368421052632e-06, | |
| "loss": 0.3686, | |
| "step": 1325 | |
| }, | |
| { | |
| "epoch": 2.393617021276596, | |
| "grad_norm": 13.008705139160156, | |
| "learning_rate": 4.55421052631579e-06, | |
| "loss": 0.3899, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 2.4379432624113475, | |
| "grad_norm": 10.24173355102539, | |
| "learning_rate": 4.541052631578948e-06, | |
| "loss": 0.4028, | |
| "step": 1375 | |
| }, | |
| { | |
| "epoch": 2.482269503546099, | |
| "grad_norm": 10.5569486618042, | |
| "learning_rate": 4.527894736842106e-06, | |
| "loss": 0.3634, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 2.526595744680851, | |
| "grad_norm": 10.904850006103516, | |
| "learning_rate": 4.514736842105263e-06, | |
| "loss": 0.4181, | |
| "step": 1425 | |
| }, | |
| { | |
| "epoch": 2.5709219858156027, | |
| "grad_norm": 10.719099998474121, | |
| "learning_rate": 4.501578947368421e-06, | |
| "loss": 0.3803, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 2.6152482269503547, | |
| "grad_norm": 10.873899459838867, | |
| "learning_rate": 4.488421052631579e-06, | |
| "loss": 0.3983, | |
| "step": 1475 | |
| }, | |
| { | |
| "epoch": 2.6595744680851063, | |
| "grad_norm": 8.574480056762695, | |
| "learning_rate": 4.475263157894737e-06, | |
| "loss": 0.3894, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 2.703900709219858, | |
| "grad_norm": 10.148545265197754, | |
| "learning_rate": 4.462105263157895e-06, | |
| "loss": 0.3672, | |
| "step": 1525 | |
| }, | |
| { | |
| "epoch": 2.74822695035461, | |
| "grad_norm": 11.479018211364746, | |
| "learning_rate": 4.448947368421053e-06, | |
| "loss": 0.3824, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 2.7925531914893615, | |
| "grad_norm": 10.652966499328613, | |
| "learning_rate": 4.435789473684211e-06, | |
| "loss": 0.3849, | |
| "step": 1575 | |
| }, | |
| { | |
| "epoch": 2.8368794326241136, | |
| "grad_norm": 10.057666778564453, | |
| "learning_rate": 4.422631578947369e-06, | |
| "loss": 0.3702, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 2.881205673758865, | |
| "grad_norm": 9.985100746154785, | |
| "learning_rate": 4.409473684210527e-06, | |
| "loss": 0.4052, | |
| "step": 1625 | |
| }, | |
| { | |
| "epoch": 2.925531914893617, | |
| "grad_norm": 9.165911674499512, | |
| "learning_rate": 4.396315789473685e-06, | |
| "loss": 0.3697, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 2.969858156028369, | |
| "grad_norm": 10.057464599609375, | |
| "learning_rate": 4.383157894736842e-06, | |
| "loss": 0.3663, | |
| "step": 1675 | |
| }, | |
| { | |
| "epoch": 3.0141843971631204, | |
| "grad_norm": 10.039346694946289, | |
| "learning_rate": 4.3700000000000005e-06, | |
| "loss": 0.3443, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 3.0585106382978724, | |
| "grad_norm": 9.51621150970459, | |
| "learning_rate": 4.356842105263158e-06, | |
| "loss": 0.2917, | |
| "step": 1725 | |
| }, | |
| { | |
| "epoch": 3.102836879432624, | |
| "grad_norm": 10.015137672424316, | |
| "learning_rate": 4.343684210526316e-06, | |
| "loss": 0.3045, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 3.147163120567376, | |
| "grad_norm": 10.268891334533691, | |
| "learning_rate": 4.330526315789474e-06, | |
| "loss": 0.2821, | |
| "step": 1775 | |
| }, | |
| { | |
| "epoch": 3.1914893617021276, | |
| "grad_norm": 9.120494842529297, | |
| "learning_rate": 4.317368421052632e-06, | |
| "loss": 0.2956, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 3.2358156028368796, | |
| "grad_norm": 10.420275688171387, | |
| "learning_rate": 4.30421052631579e-06, | |
| "loss": 0.3086, | |
| "step": 1825 | |
| }, | |
| { | |
| "epoch": 3.280141843971631, | |
| "grad_norm": 9.254629135131836, | |
| "learning_rate": 4.291052631578947e-06, | |
| "loss": 0.3083, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 3.324468085106383, | |
| "grad_norm": 9.865363121032715, | |
| "learning_rate": 4.277894736842106e-06, | |
| "loss": 0.3071, | |
| "step": 1875 | |
| }, | |
| { | |
| "epoch": 3.368794326241135, | |
| "grad_norm": 11.790287017822266, | |
| "learning_rate": 4.264736842105264e-06, | |
| "loss": 0.3014, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 3.4131205673758864, | |
| "grad_norm": 10.183505058288574, | |
| "learning_rate": 4.251578947368421e-06, | |
| "loss": 0.293, | |
| "step": 1925 | |
| }, | |
| { | |
| "epoch": 3.4574468085106385, | |
| "grad_norm": 9.69072151184082, | |
| "learning_rate": 4.23842105263158e-06, | |
| "loss": 0.2978, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 3.50177304964539, | |
| "grad_norm": 10.96455192565918, | |
| "learning_rate": 4.225263157894737e-06, | |
| "loss": 0.311, | |
| "step": 1975 | |
| }, | |
| { | |
| "epoch": 3.546099290780142, | |
| "grad_norm": 11.342255592346191, | |
| "learning_rate": 4.212105263157895e-06, | |
| "loss": 0.3042, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 3.546099290780142, | |
| "eval_loss": 0.5099755525588989, | |
| "eval_runtime": 581.1553, | |
| "eval_samples_per_second": 6.704, | |
| "eval_steps_per_second": 0.838, | |
| "eval_wer": 41.17311870080843, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 3.5904255319148937, | |
| "grad_norm": 10.30219554901123, | |
| "learning_rate": 4.198947368421053e-06, | |
| "loss": 0.3039, | |
| "step": 2025 | |
| }, | |
| { | |
| "epoch": 3.6347517730496453, | |
| "grad_norm": 9.825774192810059, | |
| "learning_rate": 4.185789473684211e-06, | |
| "loss": 0.292, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 3.6790780141843973, | |
| "grad_norm": 9.612593650817871, | |
| "learning_rate": 4.172631578947369e-06, | |
| "loss": 0.3133, | |
| "step": 2075 | |
| }, | |
| { | |
| "epoch": 3.723404255319149, | |
| "grad_norm": 9.864873886108398, | |
| "learning_rate": 4.159473684210526e-06, | |
| "loss": 0.2786, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 3.7677304964539005, | |
| "grad_norm": 9.14278507232666, | |
| "learning_rate": 4.1463157894736845e-06, | |
| "loss": 0.2992, | |
| "step": 2125 | |
| }, | |
| { | |
| "epoch": 3.8120567375886525, | |
| "grad_norm": 10.981643676757812, | |
| "learning_rate": 4.133157894736842e-06, | |
| "loss": 0.2987, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 3.8563829787234045, | |
| "grad_norm": 10.71380615234375, | |
| "learning_rate": 4.12e-06, | |
| "loss": 0.3088, | |
| "step": 2175 | |
| }, | |
| { | |
| "epoch": 3.900709219858156, | |
| "grad_norm": 11.365142822265625, | |
| "learning_rate": 4.106842105263158e-06, | |
| "loss": 0.302, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 3.9450354609929077, | |
| "grad_norm": 11.918941497802734, | |
| "learning_rate": 4.093684210526316e-06, | |
| "loss": 0.31, | |
| "step": 2225 | |
| }, | |
| { | |
| "epoch": 3.9893617021276597, | |
| "grad_norm": 10.240377426147461, | |
| "learning_rate": 4.0805263157894745e-06, | |
| "loss": 0.3048, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 4.033687943262412, | |
| "grad_norm": 7.562131404876709, | |
| "learning_rate": 4.067368421052632e-06, | |
| "loss": 0.2673, | |
| "step": 2275 | |
| }, | |
| { | |
| "epoch": 4.078014184397163, | |
| "grad_norm": 7.741388320922852, | |
| "learning_rate": 4.05421052631579e-06, | |
| "loss": 0.2364, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 4.122340425531915, | |
| "grad_norm": 8.797900199890137, | |
| "learning_rate": 4.041052631578948e-06, | |
| "loss": 0.2524, | |
| "step": 2325 | |
| }, | |
| { | |
| "epoch": 4.166666666666667, | |
| "grad_norm": 9.751541137695312, | |
| "learning_rate": 4.027894736842105e-06, | |
| "loss": 0.2565, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 4.210992907801418, | |
| "grad_norm": 7.299990653991699, | |
| "learning_rate": 4.014736842105264e-06, | |
| "loss": 0.2405, | |
| "step": 2375 | |
| }, | |
| { | |
| "epoch": 4.25531914893617, | |
| "grad_norm": 8.407694816589355, | |
| "learning_rate": 4.001578947368421e-06, | |
| "loss": 0.226, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 4.299645390070922, | |
| "grad_norm": 8.874945640563965, | |
| "learning_rate": 3.9884210526315795e-06, | |
| "loss": 0.2366, | |
| "step": 2425 | |
| }, | |
| { | |
| "epoch": 4.343971631205674, | |
| "grad_norm": 8.172481536865234, | |
| "learning_rate": 3.975263157894737e-06, | |
| "loss": 0.2325, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 4.388297872340425, | |
| "grad_norm": 8.563679695129395, | |
| "learning_rate": 3.962105263157895e-06, | |
| "loss": 0.2266, | |
| "step": 2475 | |
| }, | |
| { | |
| "epoch": 4.432624113475177, | |
| "grad_norm": 7.646442413330078, | |
| "learning_rate": 3.948947368421053e-06, | |
| "loss": 0.2477, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 4.476950354609929, | |
| "grad_norm": 8.14061164855957, | |
| "learning_rate": 3.93578947368421e-06, | |
| "loss": 0.2372, | |
| "step": 2525 | |
| }, | |
| { | |
| "epoch": 4.5212765957446805, | |
| "grad_norm": 6.697457790374756, | |
| "learning_rate": 3.9226315789473694e-06, | |
| "loss": 0.2312, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 4.565602836879433, | |
| "grad_norm": 9.178577423095703, | |
| "learning_rate": 3.909473684210527e-06, | |
| "loss": 0.239, | |
| "step": 2575 | |
| }, | |
| { | |
| "epoch": 4.609929078014185, | |
| "grad_norm": 7.986817836761475, | |
| "learning_rate": 3.896315789473684e-06, | |
| "loss": 0.2266, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 4.654255319148936, | |
| "grad_norm": 9.992223739624023, | |
| "learning_rate": 3.883157894736843e-06, | |
| "loss": 0.2422, | |
| "step": 2625 | |
| }, | |
| { | |
| "epoch": 4.698581560283688, | |
| "grad_norm": 8.259024620056152, | |
| "learning_rate": 3.87e-06, | |
| "loss": 0.2382, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 4.74290780141844, | |
| "grad_norm": 8.913894653320312, | |
| "learning_rate": 3.8568421052631585e-06, | |
| "loss": 0.2404, | |
| "step": 2675 | |
| }, | |
| { | |
| "epoch": 4.787234042553192, | |
| "grad_norm": 8.490303993225098, | |
| "learning_rate": 3.843684210526316e-06, | |
| "loss": 0.251, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 4.831560283687943, | |
| "grad_norm": 8.170136451721191, | |
| "learning_rate": 3.830526315789474e-06, | |
| "loss": 0.2195, | |
| "step": 2725 | |
| }, | |
| { | |
| "epoch": 4.875886524822695, | |
| "grad_norm": 7.071116924285889, | |
| "learning_rate": 3.817368421052632e-06, | |
| "loss": 0.2237, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 4.920212765957447, | |
| "grad_norm": 7.995920181274414, | |
| "learning_rate": 3.8042105263157898e-06, | |
| "loss": 0.2377, | |
| "step": 2775 | |
| }, | |
| { | |
| "epoch": 4.964539007092198, | |
| "grad_norm": 8.777873039245605, | |
| "learning_rate": 3.7910526315789477e-06, | |
| "loss": 0.2211, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 5.00886524822695, | |
| "grad_norm": 7.402454376220703, | |
| "learning_rate": 3.7778947368421056e-06, | |
| "loss": 0.2341, | |
| "step": 2825 | |
| }, | |
| { | |
| "epoch": 5.053191489361702, | |
| "grad_norm": 7.576868057250977, | |
| "learning_rate": 3.764736842105263e-06, | |
| "loss": 0.2006, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 5.097517730496454, | |
| "grad_norm": 6.49124002456665, | |
| "learning_rate": 3.751578947368421e-06, | |
| "loss": 0.1835, | |
| "step": 2875 | |
| }, | |
| { | |
| "epoch": 5.141843971631205, | |
| "grad_norm": 5.912723064422607, | |
| "learning_rate": 3.7384210526315793e-06, | |
| "loss": 0.1962, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 5.1861702127659575, | |
| "grad_norm": 7.608515739440918, | |
| "learning_rate": 3.7252631578947372e-06, | |
| "loss": 0.18, | |
| "step": 2925 | |
| }, | |
| { | |
| "epoch": 5.2304964539007095, | |
| "grad_norm": 8.52009105682373, | |
| "learning_rate": 3.712105263157895e-06, | |
| "loss": 0.184, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 5.274822695035461, | |
| "grad_norm": 9.251614570617676, | |
| "learning_rate": 3.698947368421053e-06, | |
| "loss": 0.2098, | |
| "step": 2975 | |
| }, | |
| { | |
| "epoch": 5.319148936170213, | |
| "grad_norm": 8.74129581451416, | |
| "learning_rate": 3.685789473684211e-06, | |
| "loss": 0.1817, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 5.319148936170213, | |
| "eval_loss": 0.4850045144557953, | |
| "eval_runtime": 588.1549, | |
| "eval_samples_per_second": 6.624, | |
| "eval_steps_per_second": 0.828, | |
| "eval_wer": 40.756437195056805, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 5.363475177304965, | |
| "grad_norm": 9.787571907043457, | |
| "learning_rate": 3.672631578947369e-06, | |
| "loss": 0.2017, | |
| "step": 3025 | |
| }, | |
| { | |
| "epoch": 5.407801418439716, | |
| "grad_norm": 9.773175239562988, | |
| "learning_rate": 3.6594736842105268e-06, | |
| "loss": 0.1818, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 5.452127659574468, | |
| "grad_norm": 9.025221824645996, | |
| "learning_rate": 3.6463157894736847e-06, | |
| "loss": 0.1926, | |
| "step": 3075 | |
| }, | |
| { | |
| "epoch": 5.49645390070922, | |
| "grad_norm": 7.631556987762451, | |
| "learning_rate": 3.633157894736842e-06, | |
| "loss": 0.2017, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 5.540780141843972, | |
| "grad_norm": 8.103202819824219, | |
| "learning_rate": 3.62e-06, | |
| "loss": 0.1808, | |
| "step": 3125 | |
| }, | |
| { | |
| "epoch": 5.585106382978723, | |
| "grad_norm": 6.022019386291504, | |
| "learning_rate": 3.606842105263158e-06, | |
| "loss": 0.2017, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 5.629432624113475, | |
| "grad_norm": 6.922440528869629, | |
| "learning_rate": 3.593684210526316e-06, | |
| "loss": 0.2009, | |
| "step": 3175 | |
| }, | |
| { | |
| "epoch": 5.673758865248227, | |
| "grad_norm": 8.611794471740723, | |
| "learning_rate": 3.580526315789474e-06, | |
| "loss": 0.1731, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 5.718085106382979, | |
| "grad_norm": 7.4870147705078125, | |
| "learning_rate": 3.567368421052632e-06, | |
| "loss": 0.1797, | |
| "step": 3225 | |
| }, | |
| { | |
| "epoch": 5.76241134751773, | |
| "grad_norm": 8.757158279418945, | |
| "learning_rate": 3.55421052631579e-06, | |
| "loss": 0.1847, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 5.806737588652482, | |
| "grad_norm": 8.361138343811035, | |
| "learning_rate": 3.541052631578948e-06, | |
| "loss": 0.1738, | |
| "step": 3275 | |
| }, | |
| { | |
| "epoch": 5.851063829787234, | |
| "grad_norm": 8.07181453704834, | |
| "learning_rate": 3.527894736842106e-06, | |
| "loss": 0.2007, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 5.8953900709219855, | |
| "grad_norm": 7.998460292816162, | |
| "learning_rate": 3.5147368421052638e-06, | |
| "loss": 0.1848, | |
| "step": 3325 | |
| }, | |
| { | |
| "epoch": 5.939716312056738, | |
| "grad_norm": 7.463223934173584, | |
| "learning_rate": 3.5015789473684213e-06, | |
| "loss": 0.1894, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 5.98404255319149, | |
| "grad_norm": 8.993099212646484, | |
| "learning_rate": 3.488421052631579e-06, | |
| "loss": 0.1898, | |
| "step": 3375 | |
| }, | |
| { | |
| "epoch": 6.028368794326241, | |
| "grad_norm": 6.37155294418335, | |
| "learning_rate": 3.475263157894737e-06, | |
| "loss": 0.1772, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 6.072695035460993, | |
| "grad_norm": 6.82436990737915, | |
| "learning_rate": 3.462105263157895e-06, | |
| "loss": 0.1555, | |
| "step": 3425 | |
| }, | |
| { | |
| "epoch": 6.117021276595745, | |
| "grad_norm": 7.0470428466796875, | |
| "learning_rate": 3.448947368421053e-06, | |
| "loss": 0.1517, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 6.161347517730497, | |
| "grad_norm": 6.5624494552612305, | |
| "learning_rate": 3.435789473684211e-06, | |
| "loss": 0.149, | |
| "step": 3475 | |
| }, | |
| { | |
| "epoch": 6.205673758865248, | |
| "grad_norm": 7.491029262542725, | |
| "learning_rate": 3.4226315789473687e-06, | |
| "loss": 0.1599, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 6.25, | |
| "grad_norm": 6.855647563934326, | |
| "learning_rate": 3.409473684210526e-06, | |
| "loss": 0.1587, | |
| "step": 3525 | |
| }, | |
| { | |
| "epoch": 6.294326241134752, | |
| "grad_norm": 8.074361801147461, | |
| "learning_rate": 3.396315789473684e-06, | |
| "loss": 0.1674, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 6.338652482269503, | |
| "grad_norm": 5.963619709014893, | |
| "learning_rate": 3.3831578947368424e-06, | |
| "loss": 0.1499, | |
| "step": 3575 | |
| }, | |
| { | |
| "epoch": 6.382978723404255, | |
| "grad_norm": 8.021512985229492, | |
| "learning_rate": 3.3700000000000003e-06, | |
| "loss": 0.1488, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 6.427304964539007, | |
| "grad_norm": 5.919581413269043, | |
| "learning_rate": 3.3568421052631583e-06, | |
| "loss": 0.1498, | |
| "step": 3625 | |
| }, | |
| { | |
| "epoch": 6.471631205673759, | |
| "grad_norm": 6.950247287750244, | |
| "learning_rate": 3.343684210526316e-06, | |
| "loss": 0.1648, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 6.51595744680851, | |
| "grad_norm": 6.72702693939209, | |
| "learning_rate": 3.330526315789474e-06, | |
| "loss": 0.146, | |
| "step": 3675 | |
| }, | |
| { | |
| "epoch": 6.560283687943262, | |
| "grad_norm": 7.681860446929932, | |
| "learning_rate": 3.317368421052632e-06, | |
| "loss": 0.1542, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 6.6046099290780145, | |
| "grad_norm": 7.239710330963135, | |
| "learning_rate": 3.30421052631579e-06, | |
| "loss": 0.1499, | |
| "step": 3725 | |
| }, | |
| { | |
| "epoch": 6.648936170212766, | |
| "grad_norm": 7.310706615447998, | |
| "learning_rate": 3.291052631578948e-06, | |
| "loss": 0.1527, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 6.693262411347518, | |
| "grad_norm": 7.060523509979248, | |
| "learning_rate": 3.2778947368421053e-06, | |
| "loss": 0.1537, | |
| "step": 3775 | |
| }, | |
| { | |
| "epoch": 6.73758865248227, | |
| "grad_norm": 8.571366310119629, | |
| "learning_rate": 3.264736842105263e-06, | |
| "loss": 0.1598, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 6.781914893617021, | |
| "grad_norm": 6.046979904174805, | |
| "learning_rate": 3.251578947368421e-06, | |
| "loss": 0.1416, | |
| "step": 3825 | |
| }, | |
| { | |
| "epoch": 6.826241134751773, | |
| "grad_norm": 8.620864868164062, | |
| "learning_rate": 3.238421052631579e-06, | |
| "loss": 0.1454, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 6.870567375886525, | |
| "grad_norm": 8.330490112304688, | |
| "learning_rate": 3.225263157894737e-06, | |
| "loss": 0.1542, | |
| "step": 3875 | |
| }, | |
| { | |
| "epoch": 6.914893617021277, | |
| "grad_norm": 10.611557006835938, | |
| "learning_rate": 3.212105263157895e-06, | |
| "loss": 0.1676, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 6.959219858156028, | |
| "grad_norm": 6.815483570098877, | |
| "learning_rate": 3.198947368421053e-06, | |
| "loss": 0.1405, | |
| "step": 3925 | |
| }, | |
| { | |
| "epoch": 7.00354609929078, | |
| "grad_norm": 5.8308634757995605, | |
| "learning_rate": 3.185789473684211e-06, | |
| "loss": 0.1479, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 7.047872340425532, | |
| "grad_norm": 6.529901027679443, | |
| "learning_rate": 3.172631578947369e-06, | |
| "loss": 0.12, | |
| "step": 3975 | |
| }, | |
| { | |
| "epoch": 7.092198581560283, | |
| "grad_norm": 6.525743007659912, | |
| "learning_rate": 3.159473684210527e-06, | |
| "loss": 0.1214, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 7.092198581560283, | |
| "eval_loss": 0.48074454069137573, | |
| "eval_runtime": 589.8236, | |
| "eval_samples_per_second": 6.605, | |
| "eval_steps_per_second": 0.826, | |
| "eval_wer": 41.810605790804516, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 7.136524822695035, | |
| "grad_norm": 5.498377799987793, | |
| "learning_rate": 3.1463157894736844e-06, | |
| "loss": 0.1295, | |
| "step": 4025 | |
| }, | |
| { | |
| "epoch": 7.180851063829787, | |
| "grad_norm": 7.085293292999268, | |
| "learning_rate": 3.1331578947368423e-06, | |
| "loss": 0.1224, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 7.225177304964539, | |
| "grad_norm": 4.779361724853516, | |
| "learning_rate": 3.12e-06, | |
| "loss": 0.1245, | |
| "step": 4075 | |
| }, | |
| { | |
| "epoch": 7.2695035460992905, | |
| "grad_norm": 6.457351207733154, | |
| "learning_rate": 3.106842105263158e-06, | |
| "loss": 0.1192, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 7.3138297872340425, | |
| "grad_norm": 4.801368236541748, | |
| "learning_rate": 3.093684210526316e-06, | |
| "loss": 0.1322, | |
| "step": 4125 | |
| }, | |
| { | |
| "epoch": 7.358156028368795, | |
| "grad_norm": 6.449742794036865, | |
| "learning_rate": 3.080526315789474e-06, | |
| "loss": 0.1218, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 7.402482269503546, | |
| "grad_norm": 5.5234456062316895, | |
| "learning_rate": 3.067368421052632e-06, | |
| "loss": 0.1293, | |
| "step": 4175 | |
| }, | |
| { | |
| "epoch": 7.446808510638298, | |
| "grad_norm": 8.489788055419922, | |
| "learning_rate": 3.0542105263157893e-06, | |
| "loss": 0.1181, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 7.49113475177305, | |
| "grad_norm": 6.528730869293213, | |
| "learning_rate": 3.0410526315789472e-06, | |
| "loss": 0.1263, | |
| "step": 4225 | |
| }, | |
| { | |
| "epoch": 7.535460992907802, | |
| "grad_norm": 6.973687648773193, | |
| "learning_rate": 3.027894736842106e-06, | |
| "loss": 0.1179, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 7.579787234042553, | |
| "grad_norm": 6.633789539337158, | |
| "learning_rate": 3.0147368421052635e-06, | |
| "loss": 0.1314, | |
| "step": 4275 | |
| }, | |
| { | |
| "epoch": 7.624113475177305, | |
| "grad_norm": 7.518368721008301, | |
| "learning_rate": 3.0015789473684214e-06, | |
| "loss": 0.1305, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 7.668439716312057, | |
| "grad_norm": 5.739889144897461, | |
| "learning_rate": 2.9884210526315793e-06, | |
| "loss": 0.1295, | |
| "step": 4325 | |
| }, | |
| { | |
| "epoch": 7.712765957446808, | |
| "grad_norm": 6.737969398498535, | |
| "learning_rate": 2.975263157894737e-06, | |
| "loss": 0.1256, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 7.75709219858156, | |
| "grad_norm": 5.012901782989502, | |
| "learning_rate": 2.962105263157895e-06, | |
| "loss": 0.1312, | |
| "step": 4375 | |
| }, | |
| { | |
| "epoch": 7.801418439716312, | |
| "grad_norm": 5.5256853103637695, | |
| "learning_rate": 2.948947368421053e-06, | |
| "loss": 0.1242, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 7.845744680851064, | |
| "grad_norm": 6.629995346069336, | |
| "learning_rate": 2.935789473684211e-06, | |
| "loss": 0.125, | |
| "step": 4425 | |
| }, | |
| { | |
| "epoch": 7.890070921985815, | |
| "grad_norm": 5.227272987365723, | |
| "learning_rate": 2.9226315789473684e-06, | |
| "loss": 0.1191, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 7.934397163120567, | |
| "grad_norm": 5.111964225769043, | |
| "learning_rate": 2.9094736842105263e-06, | |
| "loss": 0.1145, | |
| "step": 4475 | |
| }, | |
| { | |
| "epoch": 7.9787234042553195, | |
| "grad_norm": 5.537423610687256, | |
| "learning_rate": 2.8963157894736842e-06, | |
| "loss": 0.1278, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 8.02304964539007, | |
| "grad_norm": 4.478297710418701, | |
| "learning_rate": 2.883157894736842e-06, | |
| "loss": 0.1155, | |
| "step": 4525 | |
| }, | |
| { | |
| "epoch": 8.067375886524824, | |
| "grad_norm": 5.642357349395752, | |
| "learning_rate": 2.87e-06, | |
| "loss": 0.1102, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 8.111702127659575, | |
| "grad_norm": 5.228881359100342, | |
| "learning_rate": 2.856842105263158e-06, | |
| "loss": 0.1009, | |
| "step": 4575 | |
| }, | |
| { | |
| "epoch": 8.156028368794326, | |
| "grad_norm": 5.18090295791626, | |
| "learning_rate": 2.8436842105263163e-06, | |
| "loss": 0.0992, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 8.200354609929079, | |
| "grad_norm": 6.664114475250244, | |
| "learning_rate": 2.830526315789474e-06, | |
| "loss": 0.1092, | |
| "step": 4625 | |
| }, | |
| { | |
| "epoch": 8.24468085106383, | |
| "grad_norm": 5.435600280761719, | |
| "learning_rate": 2.817368421052632e-06, | |
| "loss": 0.1011, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 8.289007092198581, | |
| "grad_norm": 5.778509140014648, | |
| "learning_rate": 2.80421052631579e-06, | |
| "loss": 0.1025, | |
| "step": 4675 | |
| }, | |
| { | |
| "epoch": 8.333333333333334, | |
| "grad_norm": 5.977304458618164, | |
| "learning_rate": 2.7910526315789475e-06, | |
| "loss": 0.1021, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 8.377659574468085, | |
| "grad_norm": 5.274112701416016, | |
| "learning_rate": 2.7778947368421054e-06, | |
| "loss": 0.0935, | |
| "step": 4725 | |
| }, | |
| { | |
| "epoch": 8.421985815602836, | |
| "grad_norm": 5.418082237243652, | |
| "learning_rate": 2.7647368421052633e-06, | |
| "loss": 0.0965, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 8.46631205673759, | |
| "grad_norm": 5.248587131500244, | |
| "learning_rate": 2.7515789473684212e-06, | |
| "loss": 0.1014, | |
| "step": 4775 | |
| }, | |
| { | |
| "epoch": 8.51063829787234, | |
| "grad_norm": 5.329669952392578, | |
| "learning_rate": 2.738421052631579e-06, | |
| "loss": 0.1052, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 8.554964539007091, | |
| "grad_norm": 5.469305038452148, | |
| "learning_rate": 2.725263157894737e-06, | |
| "loss": 0.0988, | |
| "step": 4825 | |
| }, | |
| { | |
| "epoch": 8.599290780141844, | |
| "grad_norm": 4.839619159698486, | |
| "learning_rate": 2.712105263157895e-06, | |
| "loss": 0.1038, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 8.643617021276595, | |
| "grad_norm": 5.6988420486450195, | |
| "learning_rate": 2.6989473684210524e-06, | |
| "loss": 0.1005, | |
| "step": 4875 | |
| }, | |
| { | |
| "epoch": 8.687943262411348, | |
| "grad_norm": 6.122032165527344, | |
| "learning_rate": 2.6857894736842104e-06, | |
| "loss": 0.1071, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 8.7322695035461, | |
| "grad_norm": 5.00734806060791, | |
| "learning_rate": 2.672631578947369e-06, | |
| "loss": 0.1009, | |
| "step": 4925 | |
| }, | |
| { | |
| "epoch": 8.77659574468085, | |
| "grad_norm": 6.199928283691406, | |
| "learning_rate": 2.6594736842105266e-06, | |
| "loss": 0.1041, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 8.820921985815604, | |
| "grad_norm": 6.134685516357422, | |
| "learning_rate": 2.6463157894736845e-06, | |
| "loss": 0.1044, | |
| "step": 4975 | |
| }, | |
| { | |
| "epoch": 8.865248226950355, | |
| "grad_norm": 5.176562786102295, | |
| "learning_rate": 2.6331578947368424e-06, | |
| "loss": 0.1066, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 8.865248226950355, | |
| "eval_loss": 0.4846879839897156, | |
| "eval_runtime": 580.4904, | |
| "eval_samples_per_second": 6.712, | |
| "eval_steps_per_second": 0.839, | |
| "eval_wer": 37.96787634887283, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 8.909574468085106, | |
| "grad_norm": 5.835010051727295, | |
| "learning_rate": 2.6200000000000003e-06, | |
| "loss": 0.1001, | |
| "step": 5025 | |
| }, | |
| { | |
| "epoch": 8.953900709219859, | |
| "grad_norm": 6.407568454742432, | |
| "learning_rate": 2.6068421052631582e-06, | |
| "loss": 0.0965, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 8.99822695035461, | |
| "grad_norm": 6.197821617126465, | |
| "learning_rate": 2.593684210526316e-06, | |
| "loss": 0.1048, | |
| "step": 5075 | |
| }, | |
| { | |
| "epoch": 9.042553191489361, | |
| "grad_norm": 4.808340072631836, | |
| "learning_rate": 2.580526315789474e-06, | |
| "loss": 0.085, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 9.086879432624114, | |
| "grad_norm": 4.098535060882568, | |
| "learning_rate": 2.5673684210526315e-06, | |
| "loss": 0.0815, | |
| "step": 5125 | |
| }, | |
| { | |
| "epoch": 9.131205673758865, | |
| "grad_norm": 4.34876012802124, | |
| "learning_rate": 2.5542105263157894e-06, | |
| "loss": 0.0884, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 9.175531914893616, | |
| "grad_norm": 4.318136692047119, | |
| "learning_rate": 2.5410526315789474e-06, | |
| "loss": 0.0826, | |
| "step": 5175 | |
| }, | |
| { | |
| "epoch": 9.21985815602837, | |
| "grad_norm": 5.460968017578125, | |
| "learning_rate": 2.5278947368421053e-06, | |
| "loss": 0.0799, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 9.26418439716312, | |
| "grad_norm": 4.200242042541504, | |
| "learning_rate": 2.514736842105263e-06, | |
| "loss": 0.0835, | |
| "step": 5225 | |
| }, | |
| { | |
| "epoch": 9.308510638297872, | |
| "grad_norm": 5.984395503997803, | |
| "learning_rate": 2.501578947368421e-06, | |
| "loss": 0.0881, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 9.352836879432624, | |
| "grad_norm": 4.829773426055908, | |
| "learning_rate": 2.488421052631579e-06, | |
| "loss": 0.0786, | |
| "step": 5275 | |
| }, | |
| { | |
| "epoch": 9.397163120567376, | |
| "grad_norm": 3.996610403060913, | |
| "learning_rate": 2.475263157894737e-06, | |
| "loss": 0.0865, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 9.441489361702128, | |
| "grad_norm": 6.336328029632568, | |
| "learning_rate": 2.462105263157895e-06, | |
| "loss": 0.0802, | |
| "step": 5325 | |
| }, | |
| { | |
| "epoch": 9.48581560283688, | |
| "grad_norm": 5.054424285888672, | |
| "learning_rate": 2.448947368421053e-06, | |
| "loss": 0.0928, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 9.53014184397163, | |
| "grad_norm": 6.595405101776123, | |
| "learning_rate": 2.4357894736842106e-06, | |
| "loss": 0.0858, | |
| "step": 5375 | |
| }, | |
| { | |
| "epoch": 9.574468085106384, | |
| "grad_norm": 5.797497272491455, | |
| "learning_rate": 2.4226315789473685e-06, | |
| "loss": 0.0846, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 9.618794326241135, | |
| "grad_norm": 5.1372551918029785, | |
| "learning_rate": 2.4094736842105265e-06, | |
| "loss": 0.0789, | |
| "step": 5425 | |
| }, | |
| { | |
| "epoch": 9.663120567375886, | |
| "grad_norm": 6.663181304931641, | |
| "learning_rate": 2.3963157894736844e-06, | |
| "loss": 0.0977, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 9.707446808510639, | |
| "grad_norm": 4.910397529602051, | |
| "learning_rate": 2.3831578947368423e-06, | |
| "loss": 0.087, | |
| "step": 5475 | |
| }, | |
| { | |
| "epoch": 9.75177304964539, | |
| "grad_norm": 5.87327241897583, | |
| "learning_rate": 2.37e-06, | |
| "loss": 0.076, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 9.796099290780141, | |
| "grad_norm": 4.994716167449951, | |
| "learning_rate": 2.356842105263158e-06, | |
| "loss": 0.083, | |
| "step": 5525 | |
| }, | |
| { | |
| "epoch": 9.840425531914894, | |
| "grad_norm": 5.601754665374756, | |
| "learning_rate": 2.343684210526316e-06, | |
| "loss": 0.0819, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 9.884751773049645, | |
| "grad_norm": 5.767611980438232, | |
| "learning_rate": 2.330526315789474e-06, | |
| "loss": 0.0831, | |
| "step": 5575 | |
| }, | |
| { | |
| "epoch": 9.929078014184396, | |
| "grad_norm": 6.679659366607666, | |
| "learning_rate": 2.317368421052632e-06, | |
| "loss": 0.084, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 9.97340425531915, | |
| "grad_norm": 3.7785329818725586, | |
| "learning_rate": 2.3042105263157897e-06, | |
| "loss": 0.0894, | |
| "step": 5625 | |
| }, | |
| { | |
| "epoch": 10.0177304964539, | |
| "grad_norm": 4.858386516571045, | |
| "learning_rate": 2.2910526315789476e-06, | |
| "loss": 0.0817, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 10.062056737588652, | |
| "grad_norm": 3.9162485599517822, | |
| "learning_rate": 2.277894736842105e-06, | |
| "loss": 0.0669, | |
| "step": 5675 | |
| }, | |
| { | |
| "epoch": 10.106382978723405, | |
| "grad_norm": 5.168406963348389, | |
| "learning_rate": 2.2647368421052635e-06, | |
| "loss": 0.0677, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 10.150709219858156, | |
| "grad_norm": 3.850172758102417, | |
| "learning_rate": 2.2515789473684214e-06, | |
| "loss": 0.0687, | |
| "step": 5725 | |
| }, | |
| { | |
| "epoch": 10.195035460992909, | |
| "grad_norm": 3.854781150817871, | |
| "learning_rate": 2.2384210526315793e-06, | |
| "loss": 0.0734, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 10.23936170212766, | |
| "grad_norm": 3.807837724685669, | |
| "learning_rate": 2.225263157894737e-06, | |
| "loss": 0.0654, | |
| "step": 5775 | |
| }, | |
| { | |
| "epoch": 10.28368794326241, | |
| "grad_norm": 6.461479187011719, | |
| "learning_rate": 2.2121052631578947e-06, | |
| "loss": 0.0692, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 10.328014184397164, | |
| "grad_norm": 5.051649570465088, | |
| "learning_rate": 2.1989473684210526e-06, | |
| "loss": 0.0705, | |
| "step": 5825 | |
| }, | |
| { | |
| "epoch": 10.372340425531915, | |
| "grad_norm": 4.43517541885376, | |
| "learning_rate": 2.1857894736842105e-06, | |
| "loss": 0.0738, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 10.416666666666666, | |
| "grad_norm": 3.5894205570220947, | |
| "learning_rate": 2.172631578947369e-06, | |
| "loss": 0.0699, | |
| "step": 5875 | |
| }, | |
| { | |
| "epoch": 10.460992907801419, | |
| "grad_norm": 4.5283203125, | |
| "learning_rate": 2.1594736842105267e-06, | |
| "loss": 0.0688, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 10.50531914893617, | |
| "grad_norm": 3.9678380489349365, | |
| "learning_rate": 2.1463157894736842e-06, | |
| "loss": 0.0678, | |
| "step": 5925 | |
| }, | |
| { | |
| "epoch": 10.549645390070921, | |
| "grad_norm": 6.319568157196045, | |
| "learning_rate": 2.133157894736842e-06, | |
| "loss": 0.0716, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 10.593971631205674, | |
| "grad_norm": 4.849029541015625, | |
| "learning_rate": 2.12e-06, | |
| "loss": 0.0746, | |
| "step": 5975 | |
| }, | |
| { | |
| "epoch": 10.638297872340425, | |
| "grad_norm": 3.9395298957824707, | |
| "learning_rate": 2.106842105263158e-06, | |
| "loss": 0.0673, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 10.638297872340425, | |
| "eval_loss": 0.4972631335258484, | |
| "eval_runtime": 583.012, | |
| "eval_samples_per_second": 6.683, | |
| "eval_steps_per_second": 0.835, | |
| "eval_wer": 39.709391360091175, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 10.682624113475176, | |
| "grad_norm": 4.806758880615234, | |
| "learning_rate": 2.0936842105263163e-06, | |
| "loss": 0.069, | |
| "step": 6025 | |
| }, | |
| { | |
| "epoch": 10.72695035460993, | |
| "grad_norm": 5.572425842285156, | |
| "learning_rate": 2.0805263157894738e-06, | |
| "loss": 0.0699, | |
| "step": 6050 | |
| }, | |
| { | |
| "epoch": 10.77127659574468, | |
| "grad_norm": 4.8004302978515625, | |
| "learning_rate": 2.0673684210526317e-06, | |
| "loss": 0.0712, | |
| "step": 6075 | |
| }, | |
| { | |
| "epoch": 10.815602836879432, | |
| "grad_norm": 4.476444721221924, | |
| "learning_rate": 2.0547368421052633e-06, | |
| "loss": 0.0746, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 10.859929078014185, | |
| "grad_norm": 4.727671146392822, | |
| "learning_rate": 2.0415789473684213e-06, | |
| "loss": 0.073, | |
| "step": 6125 | |
| }, | |
| { | |
| "epoch": 10.904255319148936, | |
| "grad_norm": 4.828220367431641, | |
| "learning_rate": 2.028421052631579e-06, | |
| "loss": 0.0698, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 10.948581560283689, | |
| "grad_norm": 5.050329685211182, | |
| "learning_rate": 2.015263157894737e-06, | |
| "loss": 0.0705, | |
| "step": 6175 | |
| }, | |
| { | |
| "epoch": 10.99290780141844, | |
| "grad_norm": 4.282689094543457, | |
| "learning_rate": 2.002105263157895e-06, | |
| "loss": 0.0718, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 11.037234042553191, | |
| "grad_norm": 4.424275875091553, | |
| "learning_rate": 1.988947368421053e-06, | |
| "loss": 0.0636, | |
| "step": 6225 | |
| }, | |
| { | |
| "epoch": 11.081560283687944, | |
| "grad_norm": 3.237255573272705, | |
| "learning_rate": 1.975789473684211e-06, | |
| "loss": 0.0544, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 11.125886524822695, | |
| "grad_norm": 3.363708972930908, | |
| "learning_rate": 1.9626315789473683e-06, | |
| "loss": 0.0582, | |
| "step": 6275 | |
| }, | |
| { | |
| "epoch": 11.170212765957446, | |
| "grad_norm": 4.743597030639648, | |
| "learning_rate": 1.949473684210526e-06, | |
| "loss": 0.0567, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 11.214539007092199, | |
| "grad_norm": 4.212203502655029, | |
| "learning_rate": 1.9363157894736845e-06, | |
| "loss": 0.0643, | |
| "step": 6325 | |
| }, | |
| { | |
| "epoch": 11.25886524822695, | |
| "grad_norm": 3.580488443374634, | |
| "learning_rate": 1.9231578947368424e-06, | |
| "loss": 0.057, | |
| "step": 6350 | |
| }, | |
| { | |
| "epoch": 11.303191489361701, | |
| "grad_norm": 3.409921646118164, | |
| "learning_rate": 1.9100000000000003e-06, | |
| "loss": 0.0567, | |
| "step": 6375 | |
| }, | |
| { | |
| "epoch": 11.347517730496454, | |
| "grad_norm": 3.3070523738861084, | |
| "learning_rate": 1.896842105263158e-06, | |
| "loss": 0.0562, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 11.391843971631205, | |
| "grad_norm": 4.054013252258301, | |
| "learning_rate": 1.883684210526316e-06, | |
| "loss": 0.0645, | |
| "step": 6425 | |
| }, | |
| { | |
| "epoch": 11.436170212765958, | |
| "grad_norm": 3.5053253173828125, | |
| "learning_rate": 1.8705263157894737e-06, | |
| "loss": 0.0632, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 11.48049645390071, | |
| "grad_norm": 3.654541492462158, | |
| "learning_rate": 1.8573684210526316e-06, | |
| "loss": 0.0586, | |
| "step": 6475 | |
| }, | |
| { | |
| "epoch": 11.52482269503546, | |
| "grad_norm": 4.121072769165039, | |
| "learning_rate": 1.8442105263157897e-06, | |
| "loss": 0.061, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 11.569148936170214, | |
| "grad_norm": 4.159468173980713, | |
| "learning_rate": 1.8310526315789476e-06, | |
| "loss": 0.0611, | |
| "step": 6525 | |
| }, | |
| { | |
| "epoch": 11.613475177304965, | |
| "grad_norm": 4.2946672439575195, | |
| "learning_rate": 1.8178947368421055e-06, | |
| "loss": 0.0609, | |
| "step": 6550 | |
| }, | |
| { | |
| "epoch": 11.657801418439716, | |
| "grad_norm": 3.857961416244507, | |
| "learning_rate": 1.8047368421052632e-06, | |
| "loss": 0.0613, | |
| "step": 6575 | |
| }, | |
| { | |
| "epoch": 11.702127659574469, | |
| "grad_norm": 3.1902592182159424, | |
| "learning_rate": 1.7915789473684211e-06, | |
| "loss": 0.0607, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 11.74645390070922, | |
| "grad_norm": 5.016479015350342, | |
| "learning_rate": 1.778421052631579e-06, | |
| "loss": 0.0586, | |
| "step": 6625 | |
| }, | |
| { | |
| "epoch": 11.790780141843971, | |
| "grad_norm": 4.0299601554870605, | |
| "learning_rate": 1.7652631578947371e-06, | |
| "loss": 0.059, | |
| "step": 6650 | |
| }, | |
| { | |
| "epoch": 11.835106382978724, | |
| "grad_norm": 4.405561447143555, | |
| "learning_rate": 1.752105263157895e-06, | |
| "loss": 0.0623, | |
| "step": 6675 | |
| }, | |
| { | |
| "epoch": 11.879432624113475, | |
| "grad_norm": 3.684788465499878, | |
| "learning_rate": 1.7389473684210527e-06, | |
| "loss": 0.0614, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 11.923758865248226, | |
| "grad_norm": 4.335251331329346, | |
| "learning_rate": 1.7257894736842107e-06, | |
| "loss": 0.0581, | |
| "step": 6725 | |
| }, | |
| { | |
| "epoch": 11.96808510638298, | |
| "grad_norm": 4.67876672744751, | |
| "learning_rate": 1.7126315789473686e-06, | |
| "loss": 0.0569, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 12.01241134751773, | |
| "grad_norm": 5.14631462097168, | |
| "learning_rate": 1.6994736842105265e-06, | |
| "loss": 0.0548, | |
| "step": 6775 | |
| }, | |
| { | |
| "epoch": 12.056737588652481, | |
| "grad_norm": 3.651719331741333, | |
| "learning_rate": 1.6863157894736842e-06, | |
| "loss": 0.0514, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 12.101063829787234, | |
| "grad_norm": 3.455418109893799, | |
| "learning_rate": 1.6731578947368423e-06, | |
| "loss": 0.0534, | |
| "step": 6825 | |
| }, | |
| { | |
| "epoch": 12.145390070921986, | |
| "grad_norm": 3.9486734867095947, | |
| "learning_rate": 1.6600000000000002e-06, | |
| "loss": 0.0495, | |
| "step": 6850 | |
| }, | |
| { | |
| "epoch": 12.189716312056738, | |
| "grad_norm": 2.9897313117980957, | |
| "learning_rate": 1.6468421052631581e-06, | |
| "loss": 0.051, | |
| "step": 6875 | |
| }, | |
| { | |
| "epoch": 12.23404255319149, | |
| "grad_norm": 4.208747863769531, | |
| "learning_rate": 1.633684210526316e-06, | |
| "loss": 0.0486, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 12.27836879432624, | |
| "grad_norm": 3.3527841567993164, | |
| "learning_rate": 1.6205263157894737e-06, | |
| "loss": 0.0524, | |
| "step": 6925 | |
| }, | |
| { | |
| "epoch": 12.322695035460994, | |
| "grad_norm": 3.6749916076660156, | |
| "learning_rate": 1.6073684210526316e-06, | |
| "loss": 0.0577, | |
| "step": 6950 | |
| }, | |
| { | |
| "epoch": 12.367021276595745, | |
| "grad_norm": 3.266439437866211, | |
| "learning_rate": 1.5942105263157895e-06, | |
| "loss": 0.0491, | |
| "step": 6975 | |
| }, | |
| { | |
| "epoch": 12.411347517730496, | |
| "grad_norm": 3.9574387073516846, | |
| "learning_rate": 1.5810526315789477e-06, | |
| "loss": 0.0537, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 12.411347517730496, | |
| "eval_loss": 0.5095303654670715, | |
| "eval_runtime": 586.2939, | |
| "eval_samples_per_second": 6.645, | |
| "eval_steps_per_second": 0.831, | |
| "eval_wer": 41.29064425371274, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 12.455673758865249, | |
| "grad_norm": 4.798894882202148, | |
| "learning_rate": 1.5678947368421056e-06, | |
| "loss": 0.0533, | |
| "step": 7025 | |
| }, | |
| { | |
| "epoch": 12.5, | |
| "grad_norm": 3.0210700035095215, | |
| "learning_rate": 1.5547368421052633e-06, | |
| "loss": 0.0483, | |
| "step": 7050 | |
| }, | |
| { | |
| "epoch": 12.544326241134751, | |
| "grad_norm": 4.632834434509277, | |
| "learning_rate": 1.5415789473684212e-06, | |
| "loss": 0.0509, | |
| "step": 7075 | |
| }, | |
| { | |
| "epoch": 12.588652482269504, | |
| "grad_norm": 4.397753715515137, | |
| "learning_rate": 1.528421052631579e-06, | |
| "loss": 0.0515, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 12.632978723404255, | |
| "grad_norm": 2.9680283069610596, | |
| "learning_rate": 1.5152631578947368e-06, | |
| "loss": 0.0475, | |
| "step": 7125 | |
| }, | |
| { | |
| "epoch": 12.677304964539006, | |
| "grad_norm": 3.9441206455230713, | |
| "learning_rate": 1.5021052631578947e-06, | |
| "loss": 0.055, | |
| "step": 7150 | |
| }, | |
| { | |
| "epoch": 12.72163120567376, | |
| "grad_norm": 3.183037519454956, | |
| "learning_rate": 1.4889473684210528e-06, | |
| "loss": 0.0522, | |
| "step": 7175 | |
| }, | |
| { | |
| "epoch": 12.76595744680851, | |
| "grad_norm": 3.4659500122070312, | |
| "learning_rate": 1.4757894736842107e-06, | |
| "loss": 0.0477, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 12.810283687943262, | |
| "grad_norm": 3.1689703464508057, | |
| "learning_rate": 1.4626315789473686e-06, | |
| "loss": 0.0512, | |
| "step": 7225 | |
| }, | |
| { | |
| "epoch": 12.854609929078014, | |
| "grad_norm": 5.009653568267822, | |
| "learning_rate": 1.4494736842105263e-06, | |
| "loss": 0.0487, | |
| "step": 7250 | |
| }, | |
| { | |
| "epoch": 12.898936170212766, | |
| "grad_norm": 3.3407084941864014, | |
| "learning_rate": 1.4363157894736842e-06, | |
| "loss": 0.0489, | |
| "step": 7275 | |
| }, | |
| { | |
| "epoch": 12.943262411347519, | |
| "grad_norm": 4.140749454498291, | |
| "learning_rate": 1.4231578947368421e-06, | |
| "loss": 0.047, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 12.98758865248227, | |
| "grad_norm": 3.0766468048095703, | |
| "learning_rate": 1.41e-06, | |
| "loss": 0.052, | |
| "step": 7325 | |
| }, | |
| { | |
| "epoch": 13.03191489361702, | |
| "grad_norm": 3.058790683746338, | |
| "learning_rate": 1.3968421052631582e-06, | |
| "loss": 0.0445, | |
| "step": 7350 | |
| }, | |
| { | |
| "epoch": 13.076241134751774, | |
| "grad_norm": 2.4315567016601562, | |
| "learning_rate": 1.3836842105263159e-06, | |
| "loss": 0.0447, | |
| "step": 7375 | |
| }, | |
| { | |
| "epoch": 13.120567375886525, | |
| "grad_norm": 2.692753314971924, | |
| "learning_rate": 1.3705263157894738e-06, | |
| "loss": 0.0398, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 13.164893617021276, | |
| "grad_norm": 3.2242069244384766, | |
| "learning_rate": 1.3573684210526317e-06, | |
| "loss": 0.0425, | |
| "step": 7425 | |
| }, | |
| { | |
| "epoch": 13.209219858156029, | |
| "grad_norm": 3.640981674194336, | |
| "learning_rate": 1.3442105263157896e-06, | |
| "loss": 0.043, | |
| "step": 7450 | |
| }, | |
| { | |
| "epoch": 13.25354609929078, | |
| "grad_norm": 2.966660261154175, | |
| "learning_rate": 1.3310526315789473e-06, | |
| "loss": 0.0422, | |
| "step": 7475 | |
| }, | |
| { | |
| "epoch": 13.297872340425531, | |
| "grad_norm": 2.7896602153778076, | |
| "learning_rate": 1.3178947368421054e-06, | |
| "loss": 0.044, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 13.342198581560284, | |
| "grad_norm": 3.0664894580841064, | |
| "learning_rate": 1.3047368421052633e-06, | |
| "loss": 0.0432, | |
| "step": 7525 | |
| }, | |
| { | |
| "epoch": 13.386524822695035, | |
| "grad_norm": 2.7736198902130127, | |
| "learning_rate": 1.2915789473684212e-06, | |
| "loss": 0.0416, | |
| "step": 7550 | |
| }, | |
| { | |
| "epoch": 13.430851063829786, | |
| "grad_norm": 3.5016989707946777, | |
| "learning_rate": 1.2784210526315791e-06, | |
| "loss": 0.0481, | |
| "step": 7575 | |
| }, | |
| { | |
| "epoch": 13.47517730496454, | |
| "grad_norm": 3.0631349086761475, | |
| "learning_rate": 1.2652631578947368e-06, | |
| "loss": 0.0441, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 13.51950354609929, | |
| "grad_norm": 3.7912166118621826, | |
| "learning_rate": 1.2521052631578948e-06, | |
| "loss": 0.047, | |
| "step": 7625 | |
| }, | |
| { | |
| "epoch": 13.563829787234042, | |
| "grad_norm": 3.7112090587615967, | |
| "learning_rate": 1.2389473684210527e-06, | |
| "loss": 0.0442, | |
| "step": 7650 | |
| }, | |
| { | |
| "epoch": 13.608156028368795, | |
| "grad_norm": 4.523186206817627, | |
| "learning_rate": 1.2257894736842106e-06, | |
| "loss": 0.0458, | |
| "step": 7675 | |
| }, | |
| { | |
| "epoch": 13.652482269503546, | |
| "grad_norm": 3.0612213611602783, | |
| "learning_rate": 1.2126315789473685e-06, | |
| "loss": 0.0461, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 13.696808510638299, | |
| "grad_norm": 2.902688980102539, | |
| "learning_rate": 1.1994736842105264e-06, | |
| "loss": 0.0446, | |
| "step": 7725 | |
| }, | |
| { | |
| "epoch": 13.74113475177305, | |
| "grad_norm": 2.876624822616577, | |
| "learning_rate": 1.1863157894736843e-06, | |
| "loss": 0.0433, | |
| "step": 7750 | |
| }, | |
| { | |
| "epoch": 13.785460992907801, | |
| "grad_norm": 3.696685314178467, | |
| "learning_rate": 1.1731578947368422e-06, | |
| "loss": 0.0464, | |
| "step": 7775 | |
| }, | |
| { | |
| "epoch": 13.829787234042554, | |
| "grad_norm": 3.2120165824890137, | |
| "learning_rate": 1.1600000000000001e-06, | |
| "loss": 0.0444, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 13.874113475177305, | |
| "grad_norm": 3.735292434692383, | |
| "learning_rate": 1.146842105263158e-06, | |
| "loss": 0.0417, | |
| "step": 7825 | |
| }, | |
| { | |
| "epoch": 13.918439716312056, | |
| "grad_norm": 3.8104641437530518, | |
| "learning_rate": 1.133684210526316e-06, | |
| "loss": 0.045, | |
| "step": 7850 | |
| }, | |
| { | |
| "epoch": 13.962765957446809, | |
| "grad_norm": 3.321183919906616, | |
| "learning_rate": 1.1205263157894736e-06, | |
| "loss": 0.046, | |
| "step": 7875 | |
| }, | |
| { | |
| "epoch": 14.00709219858156, | |
| "grad_norm": 3.0110223293304443, | |
| "learning_rate": 1.1073684210526318e-06, | |
| "loss": 0.0452, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 14.051418439716311, | |
| "grad_norm": 2.797724485397339, | |
| "learning_rate": 1.0942105263157895e-06, | |
| "loss": 0.0384, | |
| "step": 7925 | |
| }, | |
| { | |
| "epoch": 14.095744680851064, | |
| "grad_norm": 2.8559882640838623, | |
| "learning_rate": 1.0810526315789474e-06, | |
| "loss": 0.039, | |
| "step": 7950 | |
| }, | |
| { | |
| "epoch": 14.140070921985815, | |
| "grad_norm": 3.0210611820220947, | |
| "learning_rate": 1.0678947368421055e-06, | |
| "loss": 0.0359, | |
| "step": 7975 | |
| }, | |
| { | |
| "epoch": 14.184397163120567, | |
| "grad_norm": 3.4683313369750977, | |
| "learning_rate": 1.0547368421052632e-06, | |
| "loss": 0.0393, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 14.184397163120567, | |
| "eval_loss": 0.5176098942756653, | |
| "eval_runtime": 588.4879, | |
| "eval_samples_per_second": 6.62, | |
| "eval_steps_per_second": 0.828, | |
| "eval_wer": 41.376117383097686, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 14.22872340425532, | |
| "grad_norm": 3.2651422023773193, | |
| "learning_rate": 1.041578947368421e-06, | |
| "loss": 0.039, | |
| "step": 8025 | |
| }, | |
| { | |
| "epoch": 14.27304964539007, | |
| "grad_norm": 3.2940969467163086, | |
| "learning_rate": 1.028421052631579e-06, | |
| "loss": 0.0393, | |
| "step": 8050 | |
| }, | |
| { | |
| "epoch": 14.317375886524824, | |
| "grad_norm": 3.090914011001587, | |
| "learning_rate": 1.015263157894737e-06, | |
| "loss": 0.039, | |
| "step": 8075 | |
| }, | |
| { | |
| "epoch": 14.361702127659575, | |
| "grad_norm": 3.464435577392578, | |
| "learning_rate": 1.0021052631578948e-06, | |
| "loss": 0.0395, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 14.406028368794326, | |
| "grad_norm": 2.341763734817505, | |
| "learning_rate": 9.889473684210527e-07, | |
| "loss": 0.0392, | |
| "step": 8125 | |
| }, | |
| { | |
| "epoch": 14.450354609929079, | |
| "grad_norm": 2.7853071689605713, | |
| "learning_rate": 9.757894736842106e-07, | |
| "loss": 0.0399, | |
| "step": 8150 | |
| }, | |
| { | |
| "epoch": 14.49468085106383, | |
| "grad_norm": 3.5469071865081787, | |
| "learning_rate": 9.626315789473685e-07, | |
| "loss": 0.0404, | |
| "step": 8175 | |
| }, | |
| { | |
| "epoch": 14.539007092198581, | |
| "grad_norm": 3.5632236003875732, | |
| "learning_rate": 9.494736842105263e-07, | |
| "loss": 0.0409, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 14.583333333333334, | |
| "grad_norm": 2.9529061317443848, | |
| "learning_rate": 9.363157894736844e-07, | |
| "loss": 0.0411, | |
| "step": 8225 | |
| }, | |
| { | |
| "epoch": 14.627659574468085, | |
| "grad_norm": 2.856344223022461, | |
| "learning_rate": 9.231578947368422e-07, | |
| "loss": 0.0384, | |
| "step": 8250 | |
| }, | |
| { | |
| "epoch": 14.671985815602836, | |
| "grad_norm": 3.559720039367676, | |
| "learning_rate": 9.100000000000001e-07, | |
| "loss": 0.039, | |
| "step": 8275 | |
| }, | |
| { | |
| "epoch": 14.71631205673759, | |
| "grad_norm": 3.8412675857543945, | |
| "learning_rate": 8.968421052631579e-07, | |
| "loss": 0.0361, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 14.76063829787234, | |
| "grad_norm": 3.8791191577911377, | |
| "learning_rate": 8.836842105263159e-07, | |
| "loss": 0.0357, | |
| "step": 8325 | |
| }, | |
| { | |
| "epoch": 14.804964539007091, | |
| "grad_norm": 4.187379837036133, | |
| "learning_rate": 8.705263157894737e-07, | |
| "loss": 0.0428, | |
| "step": 8350 | |
| }, | |
| { | |
| "epoch": 14.849290780141844, | |
| "grad_norm": 4.432793617248535, | |
| "learning_rate": 8.573684210526316e-07, | |
| "loss": 0.0375, | |
| "step": 8375 | |
| }, | |
| { | |
| "epoch": 14.893617021276595, | |
| "grad_norm": 3.823516368865967, | |
| "learning_rate": 8.442105263157896e-07, | |
| "loss": 0.041, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 14.937943262411348, | |
| "grad_norm": 3.2699050903320312, | |
| "learning_rate": 8.310526315789474e-07, | |
| "loss": 0.0399, | |
| "step": 8425 | |
| }, | |
| { | |
| "epoch": 14.9822695035461, | |
| "grad_norm": 2.571930408477783, | |
| "learning_rate": 8.178947368421053e-07, | |
| "loss": 0.0405, | |
| "step": 8450 | |
| }, | |
| { | |
| "epoch": 15.02659574468085, | |
| "grad_norm": 2.3387529850006104, | |
| "learning_rate": 8.047368421052632e-07, | |
| "loss": 0.0367, | |
| "step": 8475 | |
| }, | |
| { | |
| "epoch": 15.070921985815604, | |
| "grad_norm": 3.301847219467163, | |
| "learning_rate": 7.915789473684212e-07, | |
| "loss": 0.0347, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 15.115248226950355, | |
| "grad_norm": 3.649311065673828, | |
| "learning_rate": 7.78421052631579e-07, | |
| "loss": 0.0368, | |
| "step": 8525 | |
| }, | |
| { | |
| "epoch": 15.159574468085106, | |
| "grad_norm": 2.7183964252471924, | |
| "learning_rate": 7.652631578947369e-07, | |
| "loss": 0.0357, | |
| "step": 8550 | |
| }, | |
| { | |
| "epoch": 15.203900709219859, | |
| "grad_norm": 4.076670169830322, | |
| "learning_rate": 7.521052631578949e-07, | |
| "loss": 0.0345, | |
| "step": 8575 | |
| }, | |
| { | |
| "epoch": 15.24822695035461, | |
| "grad_norm": 2.5695323944091797, | |
| "learning_rate": 7.389473684210527e-07, | |
| "loss": 0.0332, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 15.292553191489361, | |
| "grad_norm": 3.0496561527252197, | |
| "learning_rate": 7.257894736842106e-07, | |
| "loss": 0.0352, | |
| "step": 8625 | |
| }, | |
| { | |
| "epoch": 15.336879432624114, | |
| "grad_norm": 2.6376793384552, | |
| "learning_rate": 7.126315789473685e-07, | |
| "loss": 0.0381, | |
| "step": 8650 | |
| }, | |
| { | |
| "epoch": 15.381205673758865, | |
| "grad_norm": 2.617739200592041, | |
| "learning_rate": 6.994736842105264e-07, | |
| "loss": 0.0382, | |
| "step": 8675 | |
| }, | |
| { | |
| "epoch": 15.425531914893616, | |
| "grad_norm": 1.8115471601486206, | |
| "learning_rate": 6.863157894736842e-07, | |
| "loss": 0.0358, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 15.46985815602837, | |
| "grad_norm": 2.169344902038574, | |
| "learning_rate": 6.731578947368421e-07, | |
| "loss": 0.0336, | |
| "step": 8725 | |
| }, | |
| { | |
| "epoch": 15.51418439716312, | |
| "grad_norm": 2.6424083709716797, | |
| "learning_rate": 6.6e-07, | |
| "loss": 0.0358, | |
| "step": 8750 | |
| }, | |
| { | |
| "epoch": 15.558510638297872, | |
| "grad_norm": 3.1048264503479004, | |
| "learning_rate": 6.468421052631579e-07, | |
| "loss": 0.0351, | |
| "step": 8775 | |
| }, | |
| { | |
| "epoch": 15.602836879432624, | |
| "grad_norm": 2.3110456466674805, | |
| "learning_rate": 6.336842105263157e-07, | |
| "loss": 0.0338, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 15.647163120567376, | |
| "grad_norm": 3.729184150695801, | |
| "learning_rate": 6.205263157894738e-07, | |
| "loss": 0.0355, | |
| "step": 8825 | |
| }, | |
| { | |
| "epoch": 15.691489361702128, | |
| "grad_norm": 2.4826672077178955, | |
| "learning_rate": 6.073684210526317e-07, | |
| "loss": 0.0314, | |
| "step": 8850 | |
| }, | |
| { | |
| "epoch": 15.73581560283688, | |
| "grad_norm": 2.23388934135437, | |
| "learning_rate": 5.942105263157895e-07, | |
| "loss": 0.0369, | |
| "step": 8875 | |
| }, | |
| { | |
| "epoch": 15.78014184397163, | |
| "grad_norm": 2.6081252098083496, | |
| "learning_rate": 5.810526315789474e-07, | |
| "loss": 0.0335, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 15.824468085106384, | |
| "grad_norm": 2.6870105266571045, | |
| "learning_rate": 5.678947368421053e-07, | |
| "loss": 0.0355, | |
| "step": 8925 | |
| }, | |
| { | |
| "epoch": 15.868794326241135, | |
| "grad_norm": 2.663280725479126, | |
| "learning_rate": 5.547368421052632e-07, | |
| "loss": 0.0386, | |
| "step": 8950 | |
| }, | |
| { | |
| "epoch": 15.913120567375886, | |
| "grad_norm": 2.3229103088378906, | |
| "learning_rate": 5.415789473684211e-07, | |
| "loss": 0.0343, | |
| "step": 8975 | |
| }, | |
| { | |
| "epoch": 15.957446808510639, | |
| "grad_norm": 2.0813558101654053, | |
| "learning_rate": 5.284210526315789e-07, | |
| "loss": 0.0354, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 15.957446808510639, | |
| "eval_loss": 0.525884747505188, | |
| "eval_runtime": 589.6497, | |
| "eval_samples_per_second": 6.607, | |
| "eval_steps_per_second": 0.826, | |
| "eval_wer": 42.95380889632822, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 16.00177304964539, | |
| "grad_norm": 2.0741195678710938, | |
| "learning_rate": 5.152631578947369e-07, | |
| "loss": 0.0357, | |
| "step": 9025 | |
| }, | |
| { | |
| "epoch": 16.04609929078014, | |
| "grad_norm": 2.2553582191467285, | |
| "learning_rate": 5.021052631578948e-07, | |
| "loss": 0.0348, | |
| "step": 9050 | |
| }, | |
| { | |
| "epoch": 16.090425531914892, | |
| "grad_norm": 2.8926563262939453, | |
| "learning_rate": 4.889473684210526e-07, | |
| "loss": 0.0329, | |
| "step": 9075 | |
| }, | |
| { | |
| "epoch": 16.134751773049647, | |
| "grad_norm": 2.0093374252319336, | |
| "learning_rate": 4.757894736842106e-07, | |
| "loss": 0.0316, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 16.179078014184398, | |
| "grad_norm": 2.607196569442749, | |
| "learning_rate": 4.626315789473684e-07, | |
| "loss": 0.0343, | |
| "step": 9125 | |
| }, | |
| { | |
| "epoch": 16.22340425531915, | |
| "grad_norm": 3.0515787601470947, | |
| "learning_rate": 4.4947368421052637e-07, | |
| "loss": 0.0337, | |
| "step": 9150 | |
| }, | |
| { | |
| "epoch": 16.2677304964539, | |
| "grad_norm": 2.6530652046203613, | |
| "learning_rate": 4.363157894736843e-07, | |
| "loss": 0.0328, | |
| "step": 9175 | |
| }, | |
| { | |
| "epoch": 16.31205673758865, | |
| "grad_norm": 2.286144733428955, | |
| "learning_rate": 4.2315789473684214e-07, | |
| "loss": 0.0314, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 16.356382978723403, | |
| "grad_norm": 1.9126091003417969, | |
| "learning_rate": 4.1000000000000004e-07, | |
| "loss": 0.0337, | |
| "step": 9225 | |
| }, | |
| { | |
| "epoch": 16.400709219858157, | |
| "grad_norm": 2.4638893604278564, | |
| "learning_rate": 3.968421052631579e-07, | |
| "loss": 0.0326, | |
| "step": 9250 | |
| }, | |
| { | |
| "epoch": 16.44503546099291, | |
| "grad_norm": 1.9740127325057983, | |
| "learning_rate": 3.836842105263158e-07, | |
| "loss": 0.0316, | |
| "step": 9275 | |
| }, | |
| { | |
| "epoch": 16.48936170212766, | |
| "grad_norm": 2.466771125793457, | |
| "learning_rate": 3.7052631578947377e-07, | |
| "loss": 0.0323, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 16.53368794326241, | |
| "grad_norm": 3.461355686187744, | |
| "learning_rate": 3.573684210526316e-07, | |
| "loss": 0.0329, | |
| "step": 9325 | |
| }, | |
| { | |
| "epoch": 16.578014184397162, | |
| "grad_norm": 4.0049662590026855, | |
| "learning_rate": 3.4421052631578954e-07, | |
| "loss": 0.0307, | |
| "step": 9350 | |
| }, | |
| { | |
| "epoch": 16.622340425531917, | |
| "grad_norm": 3.518848419189453, | |
| "learning_rate": 3.310526315789474e-07, | |
| "loss": 0.0303, | |
| "step": 9375 | |
| }, | |
| { | |
| "epoch": 16.666666666666668, | |
| "grad_norm": 2.83296799659729, | |
| "learning_rate": 3.178947368421053e-07, | |
| "loss": 0.0357, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 16.71099290780142, | |
| "grad_norm": 2.5147087574005127, | |
| "learning_rate": 3.0473684210526316e-07, | |
| "loss": 0.0328, | |
| "step": 9425 | |
| }, | |
| { | |
| "epoch": 16.75531914893617, | |
| "grad_norm": 3.1541314125061035, | |
| "learning_rate": 2.9157894736842107e-07, | |
| "loss": 0.0344, | |
| "step": 9450 | |
| }, | |
| { | |
| "epoch": 16.79964539007092, | |
| "grad_norm": 2.6284327507019043, | |
| "learning_rate": 2.78421052631579e-07, | |
| "loss": 0.0317, | |
| "step": 9475 | |
| }, | |
| { | |
| "epoch": 16.843971631205672, | |
| "grad_norm": 2.6868457794189453, | |
| "learning_rate": 2.6526315789473684e-07, | |
| "loss": 0.0336, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 16.888297872340427, | |
| "grad_norm": 1.9752613306045532, | |
| "learning_rate": 2.5210526315789474e-07, | |
| "loss": 0.0321, | |
| "step": 9525 | |
| }, | |
| { | |
| "epoch": 16.93262411347518, | |
| "grad_norm": 2.544431447982788, | |
| "learning_rate": 2.3894736842105265e-07, | |
| "loss": 0.0334, | |
| "step": 9550 | |
| }, | |
| { | |
| "epoch": 16.97695035460993, | |
| "grad_norm": 3.398198366165161, | |
| "learning_rate": 2.2578947368421054e-07, | |
| "loss": 0.0342, | |
| "step": 9575 | |
| }, | |
| { | |
| "epoch": 17.02127659574468, | |
| "grad_norm": 2.5984408855438232, | |
| "learning_rate": 2.1263157894736842e-07, | |
| "loss": 0.0341, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 17.06560283687943, | |
| "grad_norm": 2.4616684913635254, | |
| "learning_rate": 1.9947368421052633e-07, | |
| "loss": 0.0297, | |
| "step": 9625 | |
| }, | |
| { | |
| "epoch": 17.109929078014183, | |
| "grad_norm": 1.7951653003692627, | |
| "learning_rate": 1.8631578947368424e-07, | |
| "loss": 0.0315, | |
| "step": 9650 | |
| }, | |
| { | |
| "epoch": 17.154255319148938, | |
| "grad_norm": 2.757528066635132, | |
| "learning_rate": 1.7315789473684212e-07, | |
| "loss": 0.0315, | |
| "step": 9675 | |
| }, | |
| { | |
| "epoch": 17.19858156028369, | |
| "grad_norm": 2.636103868484497, | |
| "learning_rate": 1.6e-07, | |
| "loss": 0.034, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 17.24290780141844, | |
| "grad_norm": 2.6068308353424072, | |
| "learning_rate": 1.468421052631579e-07, | |
| "loss": 0.0318, | |
| "step": 9725 | |
| }, | |
| { | |
| "epoch": 17.28723404255319, | |
| "grad_norm": 2.8780813217163086, | |
| "learning_rate": 1.3368421052631582e-07, | |
| "loss": 0.0291, | |
| "step": 9750 | |
| }, | |
| { | |
| "epoch": 17.331560283687942, | |
| "grad_norm": 2.056938409805298, | |
| "learning_rate": 1.205263157894737e-07, | |
| "loss": 0.0312, | |
| "step": 9775 | |
| }, | |
| { | |
| "epoch": 17.375886524822697, | |
| "grad_norm": 1.919609785079956, | |
| "learning_rate": 1.0736842105263159e-07, | |
| "loss": 0.0301, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 17.420212765957448, | |
| "grad_norm": 2.8991692066192627, | |
| "learning_rate": 9.421052631578948e-08, | |
| "loss": 0.0325, | |
| "step": 9825 | |
| }, | |
| { | |
| "epoch": 17.4645390070922, | |
| "grad_norm": 3.554067611694336, | |
| "learning_rate": 8.105263157894738e-08, | |
| "loss": 0.0325, | |
| "step": 9850 | |
| }, | |
| { | |
| "epoch": 17.50886524822695, | |
| "grad_norm": 2.3162522315979004, | |
| "learning_rate": 6.789473684210528e-08, | |
| "loss": 0.0295, | |
| "step": 9875 | |
| }, | |
| { | |
| "epoch": 17.5531914893617, | |
| "grad_norm": 1.9810761213302612, | |
| "learning_rate": 5.473684210526316e-08, | |
| "loss": 0.0316, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 17.597517730496453, | |
| "grad_norm": 2.424508571624756, | |
| "learning_rate": 4.1578947368421054e-08, | |
| "loss": 0.0299, | |
| "step": 9925 | |
| }, | |
| { | |
| "epoch": 17.641843971631207, | |
| "grad_norm": 3.3512706756591797, | |
| "learning_rate": 2.842105263157895e-08, | |
| "loss": 0.0316, | |
| "step": 9950 | |
| }, | |
| { | |
| "epoch": 17.68617021276596, | |
| "grad_norm": 2.7042930126190186, | |
| "learning_rate": 1.5263157894736843e-08, | |
| "loss": 0.0308, | |
| "step": 9975 | |
| }, | |
| { | |
| "epoch": 17.73049645390071, | |
| "grad_norm": 2.975961685180664, | |
| "learning_rate": 2.105263157894737e-09, | |
| "loss": 0.0317, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 17.73049645390071, | |
| "eval_loss": 0.5293700695037842, | |
| "eval_runtime": 590.1218, | |
| "eval_samples_per_second": 6.602, | |
| "eval_steps_per_second": 0.825, | |
| "eval_wer": 44.56711421346914, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 17.73049645390071, | |
| "step": 10000, | |
| "total_flos": 3.93818457710592e+18, | |
| "train_loss": 0.1984061565220356, | |
| "train_runtime": 10260.2689, | |
| "train_samples_per_second": 15.594, | |
| "train_steps_per_second": 0.975 | |
| }, | |
| { | |
| "epoch": 17.73049645390071, | |
| "eval_loss": 0.4848020076751709, | |
| "eval_runtime": 614.3795, | |
| "eval_samples_per_second": 6.341, | |
| "eval_steps_per_second": 0.793, | |
| "eval_wer": 37.98212187043698, | |
| "step": 10000 | |
| } | |
| ], | |
| "logging_steps": 25, | |
| "max_steps": 10000, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 18, | |
| "save_steps": 1000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.93818457710592e+18, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |