| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.0, |
| "eval_steps": 500, |
| "global_step": 513, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.005847953216374269, |
| "grad_norm": 17.388755230326108, |
| "learning_rate": 3.846153846153847e-07, |
| "loss": 2.1955, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.011695906432748537, |
| "grad_norm": 17.472331789941457, |
| "learning_rate": 7.692307692307694e-07, |
| "loss": 2.3172, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.017543859649122806, |
| "grad_norm": 16.918399664716798, |
| "learning_rate": 1.153846153846154e-06, |
| "loss": 2.369, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.023391812865497075, |
| "grad_norm": 18.225976223813337, |
| "learning_rate": 1.5384615384615387e-06, |
| "loss": 2.3503, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.029239766081871343, |
| "grad_norm": 17.290350784392576, |
| "learning_rate": 1.9230769230769234e-06, |
| "loss": 2.206, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.03508771929824561, |
| "grad_norm": 17.713013786969835, |
| "learning_rate": 2.307692307692308e-06, |
| "loss": 2.1757, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.04093567251461988, |
| "grad_norm": 16.957525841766792, |
| "learning_rate": 2.6923076923076923e-06, |
| "loss": 2.263, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.04678362573099415, |
| "grad_norm": 15.274517120371355, |
| "learning_rate": 3.0769230769230774e-06, |
| "loss": 2.0823, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.05263157894736842, |
| "grad_norm": 16.17819052550626, |
| "learning_rate": 3.4615384615384617e-06, |
| "loss": 2.1592, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.05847953216374269, |
| "grad_norm": 14.878611384619472, |
| "learning_rate": 3.846153846153847e-06, |
| "loss": 2.0778, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.06432748538011696, |
| "grad_norm": 11.028350456358304, |
| "learning_rate": 4.230769230769231e-06, |
| "loss": 1.7525, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.07017543859649122, |
| "grad_norm": 10.604424239205104, |
| "learning_rate": 4.615384615384616e-06, |
| "loss": 1.8686, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.07602339181286549, |
| "grad_norm": 9.83197777453571, |
| "learning_rate": 5e-06, |
| "loss": 1.6995, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.08187134502923976, |
| "grad_norm": 9.28612318800235, |
| "learning_rate": 5.384615384615385e-06, |
| "loss": 1.7511, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.08771929824561403, |
| "grad_norm": 3.6938751947460333, |
| "learning_rate": 5.769230769230769e-06, |
| "loss": 1.4354, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.0935672514619883, |
| "grad_norm": 3.64251741494419, |
| "learning_rate": 6.153846153846155e-06, |
| "loss": 1.4634, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.09941520467836257, |
| "grad_norm": 3.249845410537068, |
| "learning_rate": 6.538461538461539e-06, |
| "loss": 1.4062, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.10526315789473684, |
| "grad_norm": 3.0197933728284476, |
| "learning_rate": 6.923076923076923e-06, |
| "loss": 1.4268, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.1111111111111111, |
| "grad_norm": 2.5032405922437087, |
| "learning_rate": 7.307692307692308e-06, |
| "loss": 1.3546, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.11695906432748537, |
| "grad_norm": 1.6914458221673982, |
| "learning_rate": 7.692307692307694e-06, |
| "loss": 1.2072, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.12280701754385964, |
| "grad_norm": 1.633209041430983, |
| "learning_rate": 8.076923076923077e-06, |
| "loss": 1.1706, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.1286549707602339, |
| "grad_norm": 1.62333800604462, |
| "learning_rate": 8.461538461538462e-06, |
| "loss": 1.2573, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.13450292397660818, |
| "grad_norm": 1.2572597783261759, |
| "learning_rate": 8.846153846153847e-06, |
| "loss": 1.1549, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.14035087719298245, |
| "grad_norm": 1.0892793835477907, |
| "learning_rate": 9.230769230769232e-06, |
| "loss": 1.1367, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.14619883040935672, |
| "grad_norm": 0.9726760103698124, |
| "learning_rate": 9.615384615384616e-06, |
| "loss": 1.1664, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.15204678362573099, |
| "grad_norm": 0.8399835297943901, |
| "learning_rate": 1e-05, |
| "loss": 1.0771, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.15789473684210525, |
| "grad_norm": 0.756344388475637, |
| "learning_rate": 1.0384615384615386e-05, |
| "loss": 1.0361, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.16374269005847952, |
| "grad_norm": 0.6916203141074345, |
| "learning_rate": 1.076923076923077e-05, |
| "loss": 1.0276, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.1695906432748538, |
| "grad_norm": 0.6795075377629257, |
| "learning_rate": 1.1153846153846154e-05, |
| "loss": 1.0305, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.17543859649122806, |
| "grad_norm": 0.7397958603300506, |
| "learning_rate": 1.1538461538461538e-05, |
| "loss": 1.036, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.18128654970760233, |
| "grad_norm": 0.5914063886870811, |
| "learning_rate": 1.1923076923076925e-05, |
| "loss": 1.0643, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.1871345029239766, |
| "grad_norm": 0.558807526586334, |
| "learning_rate": 1.230769230769231e-05, |
| "loss": 0.9457, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.19298245614035087, |
| "grad_norm": 0.4962345963320037, |
| "learning_rate": 1.2692307692307693e-05, |
| "loss": 0.9556, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.19883040935672514, |
| "grad_norm": 0.5368004540999115, |
| "learning_rate": 1.3076923076923078e-05, |
| "loss": 1.0031, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.2046783625730994, |
| "grad_norm": 0.5193693046254093, |
| "learning_rate": 1.3461538461538463e-05, |
| "loss": 0.937, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.21052631578947367, |
| "grad_norm": 0.42294351955291465, |
| "learning_rate": 1.3846153846153847e-05, |
| "loss": 0.8972, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.21637426900584794, |
| "grad_norm": 0.39791430214156615, |
| "learning_rate": 1.4230769230769232e-05, |
| "loss": 0.9484, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.2222222222222222, |
| "grad_norm": 0.42681451896746464, |
| "learning_rate": 1.4615384615384615e-05, |
| "loss": 0.942, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.22807017543859648, |
| "grad_norm": 0.39243989614880825, |
| "learning_rate": 1.5000000000000002e-05, |
| "loss": 0.9379, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.23391812865497075, |
| "grad_norm": 0.4195184915021303, |
| "learning_rate": 1.5384615384615387e-05, |
| "loss": 0.9327, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.23976608187134502, |
| "grad_norm": 0.3544937192321327, |
| "learning_rate": 1.576923076923077e-05, |
| "loss": 0.851, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.24561403508771928, |
| "grad_norm": 0.3416373732580841, |
| "learning_rate": 1.6153846153846154e-05, |
| "loss": 0.8644, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.25146198830409355, |
| "grad_norm": 0.4128427286910145, |
| "learning_rate": 1.653846153846154e-05, |
| "loss": 0.9002, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.2573099415204678, |
| "grad_norm": 0.4386903858466522, |
| "learning_rate": 1.6923076923076924e-05, |
| "loss": 0.8995, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.2631578947368421, |
| "grad_norm": 0.3894766430305266, |
| "learning_rate": 1.730769230769231e-05, |
| "loss": 0.8796, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.26900584795321636, |
| "grad_norm": 0.33237410703928805, |
| "learning_rate": 1.7692307692307694e-05, |
| "loss": 0.887, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.27485380116959063, |
| "grad_norm": 0.3287665841977238, |
| "learning_rate": 1.807692307692308e-05, |
| "loss": 0.8444, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.2807017543859649, |
| "grad_norm": 0.3109160417844228, |
| "learning_rate": 1.8461538461538465e-05, |
| "loss": 0.8708, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.28654970760233917, |
| "grad_norm": 0.30795401416756046, |
| "learning_rate": 1.8846153846153846e-05, |
| "loss": 0.8434, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.29239766081871343, |
| "grad_norm": 0.3549208935855604, |
| "learning_rate": 1.923076923076923e-05, |
| "loss": 0.8526, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.2982456140350877, |
| "grad_norm": 0.2755256325053317, |
| "learning_rate": 1.9615384615384617e-05, |
| "loss": 0.7736, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.30409356725146197, |
| "grad_norm": 0.43817461634852256, |
| "learning_rate": 2e-05, |
| "loss": 0.8534, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.30994152046783624, |
| "grad_norm": 0.3377814673600554, |
| "learning_rate": 1.995661605206074e-05, |
| "loss": 0.8019, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.3157894736842105, |
| "grad_norm": 0.34154032226288855, |
| "learning_rate": 1.9913232104121476e-05, |
| "loss": 0.8458, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.3216374269005848, |
| "grad_norm": 0.33647765891218867, |
| "learning_rate": 1.9869848156182215e-05, |
| "loss": 0.8527, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.32748538011695905, |
| "grad_norm": 0.2933887985818341, |
| "learning_rate": 1.9826464208242954e-05, |
| "loss": 0.8182, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.3333333333333333, |
| "grad_norm": 0.28265837293209917, |
| "learning_rate": 1.978308026030369e-05, |
| "loss": 0.8355, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.3391812865497076, |
| "grad_norm": 0.2632405163872988, |
| "learning_rate": 1.973969631236443e-05, |
| "loss": 0.7543, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.34502923976608185, |
| "grad_norm": 0.38062672853122476, |
| "learning_rate": 1.9696312364425164e-05, |
| "loss": 0.8183, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.3508771929824561, |
| "grad_norm": 0.26245608696685946, |
| "learning_rate": 1.96529284164859e-05, |
| "loss": 0.8004, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.3567251461988304, |
| "grad_norm": 0.32799401692804, |
| "learning_rate": 1.960954446854664e-05, |
| "loss": 0.8405, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.36257309941520466, |
| "grad_norm": 0.29066553024128605, |
| "learning_rate": 1.9566160520607378e-05, |
| "loss": 0.8492, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.3684210526315789, |
| "grad_norm": 0.28501467209616904, |
| "learning_rate": 1.9522776572668113e-05, |
| "loss": 0.8207, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.3742690058479532, |
| "grad_norm": 0.2525036458551552, |
| "learning_rate": 1.9479392624728852e-05, |
| "loss": 0.779, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.38011695906432746, |
| "grad_norm": 0.2920718950928194, |
| "learning_rate": 1.9436008676789588e-05, |
| "loss": 0.7937, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.38596491228070173, |
| "grad_norm": 0.27183550316859734, |
| "learning_rate": 1.9392624728850327e-05, |
| "loss": 0.8344, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.391812865497076, |
| "grad_norm": 0.272325024687968, |
| "learning_rate": 1.9349240780911066e-05, |
| "loss": 0.7577, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.39766081871345027, |
| "grad_norm": 0.2761663772793096, |
| "learning_rate": 1.93058568329718e-05, |
| "loss": 0.8253, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.40350877192982454, |
| "grad_norm": 0.3577604398976665, |
| "learning_rate": 1.926247288503254e-05, |
| "loss": 0.871, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.4093567251461988, |
| "grad_norm": 0.3054954243342987, |
| "learning_rate": 1.921908893709328e-05, |
| "loss": 0.8485, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.4152046783625731, |
| "grad_norm": 0.2295446772431491, |
| "learning_rate": 1.9175704989154015e-05, |
| "loss": 0.775, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.42105263157894735, |
| "grad_norm": 0.27441930221043814, |
| "learning_rate": 1.9132321041214754e-05, |
| "loss": 0.7984, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.4269005847953216, |
| "grad_norm": 0.25560502683198316, |
| "learning_rate": 1.908893709327549e-05, |
| "loss": 0.8089, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.4327485380116959, |
| "grad_norm": 0.27391446302846595, |
| "learning_rate": 1.9045553145336228e-05, |
| "loss": 0.8194, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.43859649122807015, |
| "grad_norm": 0.25049008602661516, |
| "learning_rate": 1.9002169197396964e-05, |
| "loss": 0.7685, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.4444444444444444, |
| "grad_norm": 0.32703190034733925, |
| "learning_rate": 1.8958785249457703e-05, |
| "loss": 0.8045, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.4502923976608187, |
| "grad_norm": 0.2461722936867296, |
| "learning_rate": 1.8915401301518438e-05, |
| "loss": 0.7747, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.45614035087719296, |
| "grad_norm": 0.3049860315464052, |
| "learning_rate": 1.8872017353579177e-05, |
| "loss": 0.8265, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.4619883040935672, |
| "grad_norm": 0.2769624138638705, |
| "learning_rate": 1.8828633405639916e-05, |
| "loss": 0.8186, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.4678362573099415, |
| "grad_norm": 0.22632052204690653, |
| "learning_rate": 1.878524945770065e-05, |
| "loss": 0.7426, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.47368421052631576, |
| "grad_norm": 0.2538308819987603, |
| "learning_rate": 1.874186550976139e-05, |
| "loss": 0.7849, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.47953216374269003, |
| "grad_norm": 0.3146181235378422, |
| "learning_rate": 1.869848156182213e-05, |
| "loss": 0.8087, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.4853801169590643, |
| "grad_norm": 0.22831617588223724, |
| "learning_rate": 1.8655097613882865e-05, |
| "loss": 0.7431, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.49122807017543857, |
| "grad_norm": 0.24832072861713958, |
| "learning_rate": 1.8611713665943604e-05, |
| "loss": 0.7807, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.49707602339181284, |
| "grad_norm": 0.28945761508471823, |
| "learning_rate": 1.856832971800434e-05, |
| "loss": 0.8025, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.5029239766081871, |
| "grad_norm": 0.24882286573309492, |
| "learning_rate": 1.852494577006508e-05, |
| "loss": 0.8041, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.5087719298245614, |
| "grad_norm": 0.2569507918826724, |
| "learning_rate": 1.8481561822125814e-05, |
| "loss": 0.8097, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.5146198830409356, |
| "grad_norm": 0.2660930480772777, |
| "learning_rate": 1.8438177874186553e-05, |
| "loss": 0.7199, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.52046783625731, |
| "grad_norm": 0.26945118834678633, |
| "learning_rate": 1.839479392624729e-05, |
| "loss": 0.8035, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.5263157894736842, |
| "grad_norm": 0.2748667946921001, |
| "learning_rate": 1.8351409978308028e-05, |
| "loss": 0.8062, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.5321637426900585, |
| "grad_norm": 0.2363367636075127, |
| "learning_rate": 1.8308026030368763e-05, |
| "loss": 0.7497, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.5380116959064327, |
| "grad_norm": 0.2194408996520716, |
| "learning_rate": 1.8264642082429502e-05, |
| "loss": 0.7582, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.543859649122807, |
| "grad_norm": 0.2479217006944137, |
| "learning_rate": 1.822125813449024e-05, |
| "loss": 0.7816, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.5497076023391813, |
| "grad_norm": 0.24365954457591307, |
| "learning_rate": 1.8177874186550977e-05, |
| "loss": 0.7951, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.5555555555555556, |
| "grad_norm": 0.2480572301895391, |
| "learning_rate": 1.8134490238611715e-05, |
| "loss": 0.7808, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.5614035087719298, |
| "grad_norm": 0.24464048645651124, |
| "learning_rate": 1.8091106290672454e-05, |
| "loss": 0.7153, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.5672514619883041, |
| "grad_norm": 0.23776979402481216, |
| "learning_rate": 1.804772234273319e-05, |
| "loss": 0.7168, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.5730994152046783, |
| "grad_norm": 0.2779826898090206, |
| "learning_rate": 1.800433839479393e-05, |
| "loss": 0.784, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.5789473684210527, |
| "grad_norm": 0.2625471662464305, |
| "learning_rate": 1.7960954446854664e-05, |
| "loss": 0.7575, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.5847953216374269, |
| "grad_norm": 0.24973722791738373, |
| "learning_rate": 1.7917570498915403e-05, |
| "loss": 0.7604, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.5906432748538012, |
| "grad_norm": 0.24882129597326091, |
| "learning_rate": 1.787418655097614e-05, |
| "loss": 0.7571, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.5964912280701754, |
| "grad_norm": 0.2490465646513338, |
| "learning_rate": 1.7830802603036878e-05, |
| "loss": 0.7728, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.6023391812865497, |
| "grad_norm": 0.2803127473261744, |
| "learning_rate": 1.7787418655097614e-05, |
| "loss": 0.7486, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.6081871345029239, |
| "grad_norm": 0.30875931205277196, |
| "learning_rate": 1.7744034707158352e-05, |
| "loss": 0.7747, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.6140350877192983, |
| "grad_norm": 0.249801739956383, |
| "learning_rate": 1.770065075921909e-05, |
| "loss": 0.7719, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.6198830409356725, |
| "grad_norm": 0.2493900745685089, |
| "learning_rate": 1.7657266811279827e-05, |
| "loss": 0.7517, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.6257309941520468, |
| "grad_norm": 0.2217608176730444, |
| "learning_rate": 1.7613882863340566e-05, |
| "loss": 0.7385, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.631578947368421, |
| "grad_norm": 0.23151529808146598, |
| "learning_rate": 1.7570498915401305e-05, |
| "loss": 0.7092, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.6374269005847953, |
| "grad_norm": 0.2648606357036367, |
| "learning_rate": 1.752711496746204e-05, |
| "loss": 0.7748, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.6432748538011696, |
| "grad_norm": 0.22637593754873542, |
| "learning_rate": 1.748373101952278e-05, |
| "loss": 0.7594, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.6491228070175439, |
| "grad_norm": 0.24569329004133555, |
| "learning_rate": 1.7440347071583515e-05, |
| "loss": 0.7662, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.6549707602339181, |
| "grad_norm": 0.23086082605618571, |
| "learning_rate": 1.7396963123644254e-05, |
| "loss": 0.7291, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.6608187134502924, |
| "grad_norm": 0.23164513757355204, |
| "learning_rate": 1.735357917570499e-05, |
| "loss": 0.761, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.6666666666666666, |
| "grad_norm": 0.2341951309434963, |
| "learning_rate": 1.731019522776573e-05, |
| "loss": 0.7707, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.672514619883041, |
| "grad_norm": 0.2294815579241083, |
| "learning_rate": 1.7266811279826464e-05, |
| "loss": 0.7307, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.6783625730994152, |
| "grad_norm": 0.2425767445634441, |
| "learning_rate": 1.7223427331887203e-05, |
| "loss": 0.7573, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.6842105263157895, |
| "grad_norm": 0.22967591410278537, |
| "learning_rate": 1.718004338394794e-05, |
| "loss": 0.7513, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.6900584795321637, |
| "grad_norm": 0.26903092877754314, |
| "learning_rate": 1.7136659436008677e-05, |
| "loss": 0.7858, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.695906432748538, |
| "grad_norm": 0.2571480378610959, |
| "learning_rate": 1.7093275488069416e-05, |
| "loss": 0.7736, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.7017543859649122, |
| "grad_norm": 0.23273043019862788, |
| "learning_rate": 1.7049891540130152e-05, |
| "loss": 0.7669, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.7076023391812866, |
| "grad_norm": 0.23314091686361454, |
| "learning_rate": 1.700650759219089e-05, |
| "loss": 0.7699, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.7134502923976608, |
| "grad_norm": 0.26268224212689045, |
| "learning_rate": 1.696312364425163e-05, |
| "loss": 0.7832, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.7192982456140351, |
| "grad_norm": 0.26423904380170976, |
| "learning_rate": 1.6919739696312365e-05, |
| "loss": 0.7595, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.7251461988304093, |
| "grad_norm": 0.21495414583172803, |
| "learning_rate": 1.6876355748373104e-05, |
| "loss": 0.7106, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.7309941520467836, |
| "grad_norm": 0.2111254963997244, |
| "learning_rate": 1.6832971800433843e-05, |
| "loss": 0.7455, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.7368421052631579, |
| "grad_norm": 0.2156942153910527, |
| "learning_rate": 1.678958785249458e-05, |
| "loss": 0.69, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.7426900584795322, |
| "grad_norm": 0.20057578031019538, |
| "learning_rate": 1.6746203904555314e-05, |
| "loss": 0.7253, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.7485380116959064, |
| "grad_norm": 0.258323958272931, |
| "learning_rate": 1.6702819956616053e-05, |
| "loss": 0.7156, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.7543859649122807, |
| "grad_norm": 0.23301112011268071, |
| "learning_rate": 1.665943600867679e-05, |
| "loss": 0.7562, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.7602339181286549, |
| "grad_norm": 0.27354281471105707, |
| "learning_rate": 1.6616052060737528e-05, |
| "loss": 0.7494, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.7660818713450293, |
| "grad_norm": 0.25737706341844985, |
| "learning_rate": 1.6572668112798267e-05, |
| "loss": 0.7471, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.7719298245614035, |
| "grad_norm": 0.2112391813708006, |
| "learning_rate": 1.6529284164859002e-05, |
| "loss": 0.7296, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.7777777777777778, |
| "grad_norm": 0.2066541279425585, |
| "learning_rate": 1.648590021691974e-05, |
| "loss": 0.7427, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.783625730994152, |
| "grad_norm": 0.21492978047244818, |
| "learning_rate": 1.644251626898048e-05, |
| "loss": 0.6956, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.7894736842105263, |
| "grad_norm": 0.22539724372329056, |
| "learning_rate": 1.6399132321041216e-05, |
| "loss": 0.7358, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.7953216374269005, |
| "grad_norm": 0.223824231061946, |
| "learning_rate": 1.6355748373101955e-05, |
| "loss": 0.747, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.8011695906432749, |
| "grad_norm": 0.22433634692844312, |
| "learning_rate": 1.631236442516269e-05, |
| "loss": 0.7478, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.8070175438596491, |
| "grad_norm": 0.2355525364186235, |
| "learning_rate": 1.626898047722343e-05, |
| "loss": 0.7539, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.8128654970760234, |
| "grad_norm": 0.22774103617994296, |
| "learning_rate": 1.6225596529284168e-05, |
| "loss": 0.7227, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.8187134502923976, |
| "grad_norm": 0.24837995707152566, |
| "learning_rate": 1.6182212581344904e-05, |
| "loss": 0.7048, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.8245614035087719, |
| "grad_norm": 0.2165941656087455, |
| "learning_rate": 1.613882863340564e-05, |
| "loss": 0.7095, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.8304093567251462, |
| "grad_norm": 0.24496476577766357, |
| "learning_rate": 1.609544468546638e-05, |
| "loss": 0.731, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.8362573099415205, |
| "grad_norm": 0.2275760050109454, |
| "learning_rate": 1.6052060737527114e-05, |
| "loss": 0.7159, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.8421052631578947, |
| "grad_norm": 0.203518916790755, |
| "learning_rate": 1.6008676789587853e-05, |
| "loss": 0.6462, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.847953216374269, |
| "grad_norm": 0.24268384602078139, |
| "learning_rate": 1.5965292841648592e-05, |
| "loss": 0.7206, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.8538011695906432, |
| "grad_norm": 0.2911481588164572, |
| "learning_rate": 1.5921908893709327e-05, |
| "loss": 0.766, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.8596491228070176, |
| "grad_norm": 0.25277324694147335, |
| "learning_rate": 1.5878524945770066e-05, |
| "loss": 0.7501, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.8654970760233918, |
| "grad_norm": 0.2372457450088363, |
| "learning_rate": 1.5835140997830805e-05, |
| "loss": 0.712, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.8713450292397661, |
| "grad_norm": 0.19877008506291952, |
| "learning_rate": 1.579175704989154e-05, |
| "loss": 0.7146, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.8771929824561403, |
| "grad_norm": 0.27732708769815756, |
| "learning_rate": 1.574837310195228e-05, |
| "loss": 0.7347, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.8830409356725146, |
| "grad_norm": 0.20303134209612006, |
| "learning_rate": 1.570498915401302e-05, |
| "loss": 0.71, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.8888888888888888, |
| "grad_norm": 0.23703380426454326, |
| "learning_rate": 1.5661605206073754e-05, |
| "loss": 0.6915, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.8947368421052632, |
| "grad_norm": 0.23526601753982804, |
| "learning_rate": 1.5618221258134493e-05, |
| "loss": 0.72, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.9005847953216374, |
| "grad_norm": 0.2408627140057496, |
| "learning_rate": 1.557483731019523e-05, |
| "loss": 0.7206, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.9064327485380117, |
| "grad_norm": 0.22070261442759123, |
| "learning_rate": 1.5531453362255964e-05, |
| "loss": 0.7019, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.9122807017543859, |
| "grad_norm": 0.236776997470983, |
| "learning_rate": 1.5488069414316703e-05, |
| "loss": 0.7314, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.9181286549707602, |
| "grad_norm": 0.25431877096559957, |
| "learning_rate": 1.5444685466377442e-05, |
| "loss": 0.7663, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.9239766081871345, |
| "grad_norm": 0.2934790109300597, |
| "learning_rate": 1.5401301518438178e-05, |
| "loss": 0.7388, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.9298245614035088, |
| "grad_norm": 0.2287254855752223, |
| "learning_rate": 1.5357917570498917e-05, |
| "loss": 0.703, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.935672514619883, |
| "grad_norm": 0.21116594695108679, |
| "learning_rate": 1.5314533622559656e-05, |
| "loss": 0.7228, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.9415204678362573, |
| "grad_norm": 0.25825565901072856, |
| "learning_rate": 1.527114967462039e-05, |
| "loss": 0.7791, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.9473684210526315, |
| "grad_norm": 0.23103746722781796, |
| "learning_rate": 1.522776572668113e-05, |
| "loss": 0.6951, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.9532163742690059, |
| "grad_norm": 0.2580198439201409, |
| "learning_rate": 1.5184381778741866e-05, |
| "loss": 0.741, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.9590643274853801, |
| "grad_norm": 0.2974306573786225, |
| "learning_rate": 1.5140997830802605e-05, |
| "loss": 0.725, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.9649122807017544, |
| "grad_norm": 0.26570078456731205, |
| "learning_rate": 1.5097613882863342e-05, |
| "loss": 0.7467, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.9707602339181286, |
| "grad_norm": 0.4533476269871839, |
| "learning_rate": 1.5054229934924078e-05, |
| "loss": 0.6971, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.9766081871345029, |
| "grad_norm": 0.23895703831919585, |
| "learning_rate": 1.5010845986984816e-05, |
| "loss": 0.7341, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.9824561403508771, |
| "grad_norm": 0.20339364928582415, |
| "learning_rate": 1.4967462039045555e-05, |
| "loss": 0.6757, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.9883040935672515, |
| "grad_norm": 0.23049708494261534, |
| "learning_rate": 1.4924078091106291e-05, |
| "loss": 0.7236, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.9941520467836257, |
| "grad_norm": 0.2026976413223512, |
| "learning_rate": 1.488069414316703e-05, |
| "loss": 0.7006, |
| "step": 170 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.22355532686958146, |
| "learning_rate": 1.4837310195227767e-05, |
| "loss": 0.701, |
| "step": 171 |
| }, |
| { |
| "epoch": 1.0058479532163742, |
| "grad_norm": 0.20282760221622007, |
| "learning_rate": 1.4793926247288504e-05, |
| "loss": 0.7168, |
| "step": 172 |
| }, |
| { |
| "epoch": 1.0116959064327484, |
| "grad_norm": 0.19949275591479185, |
| "learning_rate": 1.4750542299349242e-05, |
| "loss": 0.6984, |
| "step": 173 |
| }, |
| { |
| "epoch": 1.0175438596491229, |
| "grad_norm": 0.21384886986850865, |
| "learning_rate": 1.470715835140998e-05, |
| "loss": 0.7272, |
| "step": 174 |
| }, |
| { |
| "epoch": 1.023391812865497, |
| "grad_norm": 0.2085869267067017, |
| "learning_rate": 1.4663774403470716e-05, |
| "loss": 0.7044, |
| "step": 175 |
| }, |
| { |
| "epoch": 1.0292397660818713, |
| "grad_norm": 0.20631353790684379, |
| "learning_rate": 1.4620390455531455e-05, |
| "loss": 0.7181, |
| "step": 176 |
| }, |
| { |
| "epoch": 1.0350877192982457, |
| "grad_norm": 0.24538509221900098, |
| "learning_rate": 1.4577006507592192e-05, |
| "loss": 0.7731, |
| "step": 177 |
| }, |
| { |
| "epoch": 1.04093567251462, |
| "grad_norm": 0.23156823897416134, |
| "learning_rate": 1.453362255965293e-05, |
| "loss": 0.7129, |
| "step": 178 |
| }, |
| { |
| "epoch": 1.0467836257309941, |
| "grad_norm": 0.20155082532453575, |
| "learning_rate": 1.4490238611713667e-05, |
| "loss": 0.7037, |
| "step": 179 |
| }, |
| { |
| "epoch": 1.0526315789473684, |
| "grad_norm": 0.19242380310026896, |
| "learning_rate": 1.4446854663774406e-05, |
| "loss": 0.7026, |
| "step": 180 |
| }, |
| { |
| "epoch": 1.0584795321637426, |
| "grad_norm": 0.21376599859201403, |
| "learning_rate": 1.4403470715835141e-05, |
| "loss": 0.7021, |
| "step": 181 |
| }, |
| { |
| "epoch": 1.064327485380117, |
| "grad_norm": 0.21321842835439078, |
| "learning_rate": 1.436008676789588e-05, |
| "loss": 0.7186, |
| "step": 182 |
| }, |
| { |
| "epoch": 1.0701754385964912, |
| "grad_norm": 0.23152992175479814, |
| "learning_rate": 1.4316702819956618e-05, |
| "loss": 0.7262, |
| "step": 183 |
| }, |
| { |
| "epoch": 1.0760233918128654, |
| "grad_norm": 0.20707778685395156, |
| "learning_rate": 1.4273318872017355e-05, |
| "loss": 0.742, |
| "step": 184 |
| }, |
| { |
| "epoch": 1.0818713450292399, |
| "grad_norm": 0.21284401184030297, |
| "learning_rate": 1.4229934924078092e-05, |
| "loss": 0.683, |
| "step": 185 |
| }, |
| { |
| "epoch": 1.087719298245614, |
| "grad_norm": 0.21105448131636317, |
| "learning_rate": 1.418655097613883e-05, |
| "loss": 0.7218, |
| "step": 186 |
| }, |
| { |
| "epoch": 1.0935672514619883, |
| "grad_norm": 0.23854659151648439, |
| "learning_rate": 1.4143167028199567e-05, |
| "loss": 0.707, |
| "step": 187 |
| }, |
| { |
| "epoch": 1.0994152046783625, |
| "grad_norm": 0.1979900232322942, |
| "learning_rate": 1.4099783080260306e-05, |
| "loss": 0.6793, |
| "step": 188 |
| }, |
| { |
| "epoch": 1.1052631578947367, |
| "grad_norm": 0.19940118749588795, |
| "learning_rate": 1.4056399132321041e-05, |
| "loss": 0.6793, |
| "step": 189 |
| }, |
| { |
| "epoch": 1.1111111111111112, |
| "grad_norm": 0.2216608207413802, |
| "learning_rate": 1.401301518438178e-05, |
| "loss": 0.7183, |
| "step": 190 |
| }, |
| { |
| "epoch": 1.1169590643274854, |
| "grad_norm": 0.19705996044476262, |
| "learning_rate": 1.3969631236442517e-05, |
| "loss": 0.692, |
| "step": 191 |
| }, |
| { |
| "epoch": 1.1228070175438596, |
| "grad_norm": 0.18840081658391272, |
| "learning_rate": 1.3926247288503255e-05, |
| "loss": 0.69, |
| "step": 192 |
| }, |
| { |
| "epoch": 1.128654970760234, |
| "grad_norm": 0.22778993399760028, |
| "learning_rate": 1.3882863340563992e-05, |
| "loss": 0.7458, |
| "step": 193 |
| }, |
| { |
| "epoch": 1.1345029239766082, |
| "grad_norm": 0.19922962343898284, |
| "learning_rate": 1.3839479392624731e-05, |
| "loss": 0.6935, |
| "step": 194 |
| }, |
| { |
| "epoch": 1.1403508771929824, |
| "grad_norm": 0.17961965737395658, |
| "learning_rate": 1.3796095444685466e-05, |
| "loss": 0.6902, |
| "step": 195 |
| }, |
| { |
| "epoch": 1.1461988304093567, |
| "grad_norm": 0.20117480573787744, |
| "learning_rate": 1.3752711496746205e-05, |
| "loss": 0.6966, |
| "step": 196 |
| }, |
| { |
| "epoch": 1.1520467836257309, |
| "grad_norm": 0.20576287270564314, |
| "learning_rate": 1.3709327548806943e-05, |
| "loss": 0.6626, |
| "step": 197 |
| }, |
| { |
| "epoch": 1.1578947368421053, |
| "grad_norm": 0.20954364596102132, |
| "learning_rate": 1.366594360086768e-05, |
| "loss": 0.712, |
| "step": 198 |
| }, |
| { |
| "epoch": 1.1637426900584795, |
| "grad_norm": 0.18682996007735939, |
| "learning_rate": 1.3622559652928417e-05, |
| "loss": 0.7075, |
| "step": 199 |
| }, |
| { |
| "epoch": 1.1695906432748537, |
| "grad_norm": 0.20043695366127617, |
| "learning_rate": 1.3579175704989156e-05, |
| "loss": 0.688, |
| "step": 200 |
| }, |
| { |
| "epoch": 1.1754385964912282, |
| "grad_norm": 0.19280097802899304, |
| "learning_rate": 1.3535791757049892e-05, |
| "loss": 0.7177, |
| "step": 201 |
| }, |
| { |
| "epoch": 1.1812865497076024, |
| "grad_norm": 0.1857970119964957, |
| "learning_rate": 1.349240780911063e-05, |
| "loss": 0.6463, |
| "step": 202 |
| }, |
| { |
| "epoch": 1.1871345029239766, |
| "grad_norm": 0.1825176976963816, |
| "learning_rate": 1.3449023861171368e-05, |
| "loss": 0.6673, |
| "step": 203 |
| }, |
| { |
| "epoch": 1.1929824561403508, |
| "grad_norm": 0.22051713697050027, |
| "learning_rate": 1.3405639913232105e-05, |
| "loss": 0.7145, |
| "step": 204 |
| }, |
| { |
| "epoch": 1.198830409356725, |
| "grad_norm": 0.18423219666459137, |
| "learning_rate": 1.3362255965292842e-05, |
| "loss": 0.6528, |
| "step": 205 |
| }, |
| { |
| "epoch": 1.2046783625730995, |
| "grad_norm": 0.19618225427002017, |
| "learning_rate": 1.3318872017353581e-05, |
| "loss": 0.6668, |
| "step": 206 |
| }, |
| { |
| "epoch": 1.2105263157894737, |
| "grad_norm": 0.20587148922859191, |
| "learning_rate": 1.3275488069414317e-05, |
| "loss": 0.7067, |
| "step": 207 |
| }, |
| { |
| "epoch": 1.2163742690058479, |
| "grad_norm": 0.2090448851687986, |
| "learning_rate": 1.3232104121475056e-05, |
| "loss": 0.7029, |
| "step": 208 |
| }, |
| { |
| "epoch": 1.2222222222222223, |
| "grad_norm": 0.19626708957217415, |
| "learning_rate": 1.3188720173535795e-05, |
| "loss": 0.6662, |
| "step": 209 |
| }, |
| { |
| "epoch": 1.2280701754385965, |
| "grad_norm": 0.18762036234283117, |
| "learning_rate": 1.314533622559653e-05, |
| "loss": 0.6874, |
| "step": 210 |
| }, |
| { |
| "epoch": 1.2339181286549707, |
| "grad_norm": 0.19417670023667025, |
| "learning_rate": 1.3101952277657268e-05, |
| "loss": 0.6683, |
| "step": 211 |
| }, |
| { |
| "epoch": 1.239766081871345, |
| "grad_norm": 0.20177458796436643, |
| "learning_rate": 1.3058568329718005e-05, |
| "loss": 0.685, |
| "step": 212 |
| }, |
| { |
| "epoch": 1.2456140350877192, |
| "grad_norm": 0.22040877827401095, |
| "learning_rate": 1.3015184381778742e-05, |
| "loss": 0.7254, |
| "step": 213 |
| }, |
| { |
| "epoch": 1.2514619883040936, |
| "grad_norm": 0.19637215780432019, |
| "learning_rate": 1.2971800433839481e-05, |
| "loss": 0.6897, |
| "step": 214 |
| }, |
| { |
| "epoch": 1.2573099415204678, |
| "grad_norm": 0.199110748095673, |
| "learning_rate": 1.2928416485900217e-05, |
| "loss": 0.6854, |
| "step": 215 |
| }, |
| { |
| "epoch": 1.263157894736842, |
| "grad_norm": 0.21819712890299467, |
| "learning_rate": 1.2885032537960956e-05, |
| "loss": 0.6986, |
| "step": 216 |
| }, |
| { |
| "epoch": 1.2690058479532165, |
| "grad_norm": 0.21142557814635124, |
| "learning_rate": 1.2841648590021693e-05, |
| "loss": 0.7203, |
| "step": 217 |
| }, |
| { |
| "epoch": 1.2748538011695907, |
| "grad_norm": 0.18250187399866635, |
| "learning_rate": 1.279826464208243e-05, |
| "loss": 0.6785, |
| "step": 218 |
| }, |
| { |
| "epoch": 1.280701754385965, |
| "grad_norm": 0.19755959536466466, |
| "learning_rate": 1.2754880694143167e-05, |
| "loss": 0.6706, |
| "step": 219 |
| }, |
| { |
| "epoch": 1.286549707602339, |
| "grad_norm": 0.19529246308103604, |
| "learning_rate": 1.2711496746203906e-05, |
| "loss": 0.6998, |
| "step": 220 |
| }, |
| { |
| "epoch": 1.2923976608187133, |
| "grad_norm": 0.1936160811683211, |
| "learning_rate": 1.2668112798264642e-05, |
| "loss": 0.6896, |
| "step": 221 |
| }, |
| { |
| "epoch": 1.2982456140350878, |
| "grad_norm": 0.1845218398315034, |
| "learning_rate": 1.262472885032538e-05, |
| "loss": 0.6568, |
| "step": 222 |
| }, |
| { |
| "epoch": 1.304093567251462, |
| "grad_norm": 0.20772884369385505, |
| "learning_rate": 1.258134490238612e-05, |
| "loss": 0.6625, |
| "step": 223 |
| }, |
| { |
| "epoch": 1.3099415204678362, |
| "grad_norm": 0.229042568059284, |
| "learning_rate": 1.2537960954446855e-05, |
| "loss": 0.6861, |
| "step": 224 |
| }, |
| { |
| "epoch": 1.3157894736842106, |
| "grad_norm": 0.20350171741172374, |
| "learning_rate": 1.2494577006507593e-05, |
| "loss": 0.6478, |
| "step": 225 |
| }, |
| { |
| "epoch": 1.3216374269005848, |
| "grad_norm": 0.19144221585747292, |
| "learning_rate": 1.2451193058568331e-05, |
| "loss": 0.6764, |
| "step": 226 |
| }, |
| { |
| "epoch": 1.327485380116959, |
| "grad_norm": 0.21913738701924326, |
| "learning_rate": 1.2407809110629067e-05, |
| "loss": 0.695, |
| "step": 227 |
| }, |
| { |
| "epoch": 1.3333333333333333, |
| "grad_norm": 0.2020711158267139, |
| "learning_rate": 1.2364425162689806e-05, |
| "loss": 0.7062, |
| "step": 228 |
| }, |
| { |
| "epoch": 1.3391812865497075, |
| "grad_norm": 0.21899620359258645, |
| "learning_rate": 1.2321041214750545e-05, |
| "loss": 0.7145, |
| "step": 229 |
| }, |
| { |
| "epoch": 1.345029239766082, |
| "grad_norm": 0.18931923720637447, |
| "learning_rate": 1.227765726681128e-05, |
| "loss": 0.6956, |
| "step": 230 |
| }, |
| { |
| "epoch": 1.3508771929824561, |
| "grad_norm": 0.1916810843880607, |
| "learning_rate": 1.223427331887202e-05, |
| "loss": 0.668, |
| "step": 231 |
| }, |
| { |
| "epoch": 1.3567251461988303, |
| "grad_norm": 0.19261705533668297, |
| "learning_rate": 1.2190889370932757e-05, |
| "loss": 0.653, |
| "step": 232 |
| }, |
| { |
| "epoch": 1.3625730994152048, |
| "grad_norm": 0.20814835626639575, |
| "learning_rate": 1.2147505422993492e-05, |
| "loss": 0.7068, |
| "step": 233 |
| }, |
| { |
| "epoch": 1.368421052631579, |
| "grad_norm": 0.2076525513781835, |
| "learning_rate": 1.2104121475054231e-05, |
| "loss": 0.6989, |
| "step": 234 |
| }, |
| { |
| "epoch": 1.3742690058479532, |
| "grad_norm": 0.1911948741286201, |
| "learning_rate": 1.2060737527114967e-05, |
| "loss": 0.6774, |
| "step": 235 |
| }, |
| { |
| "epoch": 1.3801169590643274, |
| "grad_norm": 0.2100123955547407, |
| "learning_rate": 1.2017353579175706e-05, |
| "loss": 0.6997, |
| "step": 236 |
| }, |
| { |
| "epoch": 1.3859649122807016, |
| "grad_norm": 0.31584573390504456, |
| "learning_rate": 1.1973969631236445e-05, |
| "loss": 0.7052, |
| "step": 237 |
| }, |
| { |
| "epoch": 1.391812865497076, |
| "grad_norm": 0.18688166233524203, |
| "learning_rate": 1.193058568329718e-05, |
| "loss": 0.6526, |
| "step": 238 |
| }, |
| { |
| "epoch": 1.3976608187134503, |
| "grad_norm": 0.22026356851753442, |
| "learning_rate": 1.1887201735357918e-05, |
| "loss": 0.6454, |
| "step": 239 |
| }, |
| { |
| "epoch": 1.4035087719298245, |
| "grad_norm": 0.19323076025261185, |
| "learning_rate": 1.1843817787418656e-05, |
| "loss": 0.6594, |
| "step": 240 |
| }, |
| { |
| "epoch": 1.409356725146199, |
| "grad_norm": 0.19902277064282112, |
| "learning_rate": 1.1800433839479392e-05, |
| "loss": 0.7244, |
| "step": 241 |
| }, |
| { |
| "epoch": 1.4152046783625731, |
| "grad_norm": 0.1908671762046153, |
| "learning_rate": 1.1757049891540131e-05, |
| "loss": 0.6681, |
| "step": 242 |
| }, |
| { |
| "epoch": 1.4210526315789473, |
| "grad_norm": 0.19560133699568794, |
| "learning_rate": 1.171366594360087e-05, |
| "loss": 0.6731, |
| "step": 243 |
| }, |
| { |
| "epoch": 1.4269005847953216, |
| "grad_norm": 0.2094438443568091, |
| "learning_rate": 1.1670281995661605e-05, |
| "loss": 0.701, |
| "step": 244 |
| }, |
| { |
| "epoch": 1.4327485380116958, |
| "grad_norm": 0.19053569086952576, |
| "learning_rate": 1.1626898047722344e-05, |
| "loss": 0.6104, |
| "step": 245 |
| }, |
| { |
| "epoch": 1.4385964912280702, |
| "grad_norm": 0.19913609339747246, |
| "learning_rate": 1.1583514099783082e-05, |
| "loss": 0.6573, |
| "step": 246 |
| }, |
| { |
| "epoch": 1.4444444444444444, |
| "grad_norm": 0.20956879358597585, |
| "learning_rate": 1.1540130151843817e-05, |
| "loss": 0.662, |
| "step": 247 |
| }, |
| { |
| "epoch": 1.4502923976608186, |
| "grad_norm": 0.20216430625120646, |
| "learning_rate": 1.1496746203904556e-05, |
| "loss": 0.6505, |
| "step": 248 |
| }, |
| { |
| "epoch": 1.456140350877193, |
| "grad_norm": 0.2061734262125184, |
| "learning_rate": 1.1453362255965295e-05, |
| "loss": 0.6786, |
| "step": 249 |
| }, |
| { |
| "epoch": 1.4619883040935673, |
| "grad_norm": 0.22574876209542377, |
| "learning_rate": 1.140997830802603e-05, |
| "loss": 0.7325, |
| "step": 250 |
| }, |
| { |
| "epoch": 1.4678362573099415, |
| "grad_norm": 0.1772735034592302, |
| "learning_rate": 1.136659436008677e-05, |
| "loss": 0.636, |
| "step": 251 |
| }, |
| { |
| "epoch": 1.4736842105263157, |
| "grad_norm": 0.2073376585966582, |
| "learning_rate": 1.1323210412147507e-05, |
| "loss": 0.6791, |
| "step": 252 |
| }, |
| { |
| "epoch": 1.47953216374269, |
| "grad_norm": 0.18918995778508665, |
| "learning_rate": 1.1279826464208244e-05, |
| "loss": 0.6406, |
| "step": 253 |
| }, |
| { |
| "epoch": 1.4853801169590644, |
| "grad_norm": 0.20195402902912296, |
| "learning_rate": 1.1236442516268981e-05, |
| "loss": 0.6625, |
| "step": 254 |
| }, |
| { |
| "epoch": 1.4912280701754386, |
| "grad_norm": 0.18582829458374092, |
| "learning_rate": 1.119305856832972e-05, |
| "loss": 0.6831, |
| "step": 255 |
| }, |
| { |
| "epoch": 1.4970760233918128, |
| "grad_norm": 0.18667034513926425, |
| "learning_rate": 1.1149674620390456e-05, |
| "loss": 0.6819, |
| "step": 256 |
| }, |
| { |
| "epoch": 1.5029239766081872, |
| "grad_norm": 0.1884977125227984, |
| "learning_rate": 1.1106290672451195e-05, |
| "loss": 0.6515, |
| "step": 257 |
| }, |
| { |
| "epoch": 1.5087719298245614, |
| "grad_norm": 0.19917650464796147, |
| "learning_rate": 1.1062906724511932e-05, |
| "loss": 0.672, |
| "step": 258 |
| }, |
| { |
| "epoch": 1.5146198830409356, |
| "grad_norm": 0.20496434407592237, |
| "learning_rate": 1.101952277657267e-05, |
| "loss": 0.6538, |
| "step": 259 |
| }, |
| { |
| "epoch": 1.52046783625731, |
| "grad_norm": 0.18169707048812828, |
| "learning_rate": 1.0976138828633407e-05, |
| "loss": 0.661, |
| "step": 260 |
| }, |
| { |
| "epoch": 1.526315789473684, |
| "grad_norm": 0.22056891228087572, |
| "learning_rate": 1.0932754880694142e-05, |
| "loss": 0.6929, |
| "step": 261 |
| }, |
| { |
| "epoch": 1.5321637426900585, |
| "grad_norm": 0.2085232928793704, |
| "learning_rate": 1.0889370932754881e-05, |
| "loss": 0.6954, |
| "step": 262 |
| }, |
| { |
| "epoch": 1.5380116959064327, |
| "grad_norm": 0.20789260798479195, |
| "learning_rate": 1.084598698481562e-05, |
| "loss": 0.7011, |
| "step": 263 |
| }, |
| { |
| "epoch": 1.543859649122807, |
| "grad_norm": 0.1849807776906847, |
| "learning_rate": 1.0802603036876356e-05, |
| "loss": 0.686, |
| "step": 264 |
| }, |
| { |
| "epoch": 1.5497076023391814, |
| "grad_norm": 0.18518274667657642, |
| "learning_rate": 1.0759219088937095e-05, |
| "loss": 0.6636, |
| "step": 265 |
| }, |
| { |
| "epoch": 1.5555555555555556, |
| "grad_norm": 0.19333183404204385, |
| "learning_rate": 1.0715835140997832e-05, |
| "loss": 0.7133, |
| "step": 266 |
| }, |
| { |
| "epoch": 1.5614035087719298, |
| "grad_norm": 0.19922629243071752, |
| "learning_rate": 1.0672451193058569e-05, |
| "loss": 0.6745, |
| "step": 267 |
| }, |
| { |
| "epoch": 1.5672514619883042, |
| "grad_norm": 0.1895519362467185, |
| "learning_rate": 1.0629067245119306e-05, |
| "loss": 0.6648, |
| "step": 268 |
| }, |
| { |
| "epoch": 1.5730994152046782, |
| "grad_norm": 0.1871315579144127, |
| "learning_rate": 1.0585683297180045e-05, |
| "loss": 0.6721, |
| "step": 269 |
| }, |
| { |
| "epoch": 1.5789473684210527, |
| "grad_norm": 0.18380449023430315, |
| "learning_rate": 1.0542299349240781e-05, |
| "loss": 0.6697, |
| "step": 270 |
| }, |
| { |
| "epoch": 1.5847953216374269, |
| "grad_norm": 0.2562545867136886, |
| "learning_rate": 1.049891540130152e-05, |
| "loss": 0.6969, |
| "step": 271 |
| }, |
| { |
| "epoch": 1.590643274853801, |
| "grad_norm": 0.1740952081571547, |
| "learning_rate": 1.0455531453362257e-05, |
| "loss": 0.6282, |
| "step": 272 |
| }, |
| { |
| "epoch": 1.5964912280701755, |
| "grad_norm": 0.1800104491661315, |
| "learning_rate": 1.0412147505422994e-05, |
| "loss": 0.666, |
| "step": 273 |
| }, |
| { |
| "epoch": 1.6023391812865497, |
| "grad_norm": 0.21004999295392382, |
| "learning_rate": 1.0368763557483732e-05, |
| "loss": 0.6849, |
| "step": 274 |
| }, |
| { |
| "epoch": 1.608187134502924, |
| "grad_norm": 0.1787656466284205, |
| "learning_rate": 1.032537960954447e-05, |
| "loss": 0.6723, |
| "step": 275 |
| }, |
| { |
| "epoch": 1.6140350877192984, |
| "grad_norm": 0.21871948943154398, |
| "learning_rate": 1.0281995661605206e-05, |
| "loss": 0.6901, |
| "step": 276 |
| }, |
| { |
| "epoch": 1.6198830409356724, |
| "grad_norm": 0.18361595886864504, |
| "learning_rate": 1.0238611713665945e-05, |
| "loss": 0.6421, |
| "step": 277 |
| }, |
| { |
| "epoch": 1.6257309941520468, |
| "grad_norm": 0.18916065927378428, |
| "learning_rate": 1.0195227765726682e-05, |
| "loss": 0.6511, |
| "step": 278 |
| }, |
| { |
| "epoch": 1.631578947368421, |
| "grad_norm": 0.1979017696376173, |
| "learning_rate": 1.015184381778742e-05, |
| "loss": 0.7018, |
| "step": 279 |
| }, |
| { |
| "epoch": 1.6374269005847952, |
| "grad_norm": 0.18969323017848436, |
| "learning_rate": 1.0108459869848157e-05, |
| "loss": 0.6677, |
| "step": 280 |
| }, |
| { |
| "epoch": 1.6432748538011697, |
| "grad_norm": 0.18594561560924552, |
| "learning_rate": 1.0065075921908896e-05, |
| "loss": 0.6584, |
| "step": 281 |
| }, |
| { |
| "epoch": 1.6491228070175439, |
| "grad_norm": 0.17998449840117228, |
| "learning_rate": 1.0021691973969631e-05, |
| "loss": 0.6616, |
| "step": 282 |
| }, |
| { |
| "epoch": 1.654970760233918, |
| "grad_norm": 0.18463090829340062, |
| "learning_rate": 9.97830802603037e-06, |
| "loss": 0.6718, |
| "step": 283 |
| }, |
| { |
| "epoch": 1.6608187134502925, |
| "grad_norm": 0.1941128688320993, |
| "learning_rate": 9.934924078091108e-06, |
| "loss": 0.7071, |
| "step": 284 |
| }, |
| { |
| "epoch": 1.6666666666666665, |
| "grad_norm": 0.19238570224026413, |
| "learning_rate": 9.891540130151845e-06, |
| "loss": 0.6732, |
| "step": 285 |
| }, |
| { |
| "epoch": 1.672514619883041, |
| "grad_norm": 0.19518291083148756, |
| "learning_rate": 9.848156182212582e-06, |
| "loss": 0.6761, |
| "step": 286 |
| }, |
| { |
| "epoch": 1.6783625730994152, |
| "grad_norm": 0.18202289710684016, |
| "learning_rate": 9.80477223427332e-06, |
| "loss": 0.6577, |
| "step": 287 |
| }, |
| { |
| "epoch": 1.6842105263157894, |
| "grad_norm": 0.18625725493648193, |
| "learning_rate": 9.761388286334057e-06, |
| "loss": 0.7017, |
| "step": 288 |
| }, |
| { |
| "epoch": 1.6900584795321638, |
| "grad_norm": 0.2100099826676321, |
| "learning_rate": 9.718004338394794e-06, |
| "loss": 0.6858, |
| "step": 289 |
| }, |
| { |
| "epoch": 1.695906432748538, |
| "grad_norm": 0.2084953033980061, |
| "learning_rate": 9.674620390455533e-06, |
| "loss": 0.6674, |
| "step": 290 |
| }, |
| { |
| "epoch": 1.7017543859649122, |
| "grad_norm": 0.18596234796611538, |
| "learning_rate": 9.63123644251627e-06, |
| "loss": 0.6496, |
| "step": 291 |
| }, |
| { |
| "epoch": 1.7076023391812867, |
| "grad_norm": 0.1978635671319887, |
| "learning_rate": 9.587852494577007e-06, |
| "loss": 0.6642, |
| "step": 292 |
| }, |
| { |
| "epoch": 1.7134502923976607, |
| "grad_norm": 0.1979317376200934, |
| "learning_rate": 9.544468546637745e-06, |
| "loss": 0.7282, |
| "step": 293 |
| }, |
| { |
| "epoch": 1.719298245614035, |
| "grad_norm": 0.1792470289825809, |
| "learning_rate": 9.501084598698482e-06, |
| "loss": 0.661, |
| "step": 294 |
| }, |
| { |
| "epoch": 1.7251461988304093, |
| "grad_norm": 0.18979635817761115, |
| "learning_rate": 9.457700650759219e-06, |
| "loss": 0.6911, |
| "step": 295 |
| }, |
| { |
| "epoch": 1.7309941520467835, |
| "grad_norm": 0.21977929643672667, |
| "learning_rate": 9.414316702819958e-06, |
| "loss": 0.6636, |
| "step": 296 |
| }, |
| { |
| "epoch": 1.736842105263158, |
| "grad_norm": 0.19142793950578896, |
| "learning_rate": 9.370932754880695e-06, |
| "loss": 0.6652, |
| "step": 297 |
| }, |
| { |
| "epoch": 1.7426900584795322, |
| "grad_norm": 0.1872314527946603, |
| "learning_rate": 9.327548806941433e-06, |
| "loss": 0.7241, |
| "step": 298 |
| }, |
| { |
| "epoch": 1.7485380116959064, |
| "grad_norm": 0.21294716763423086, |
| "learning_rate": 9.28416485900217e-06, |
| "loss": 0.6505, |
| "step": 299 |
| }, |
| { |
| "epoch": 1.7543859649122808, |
| "grad_norm": 0.20627848491038323, |
| "learning_rate": 9.240780911062907e-06, |
| "loss": 0.6839, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.7602339181286548, |
| "grad_norm": 0.1913775128261492, |
| "learning_rate": 9.197396963123644e-06, |
| "loss": 0.7072, |
| "step": 301 |
| }, |
| { |
| "epoch": 1.7660818713450293, |
| "grad_norm": 0.18287999729259147, |
| "learning_rate": 9.154013015184382e-06, |
| "loss": 0.6571, |
| "step": 302 |
| }, |
| { |
| "epoch": 1.7719298245614035, |
| "grad_norm": 0.1743048128512118, |
| "learning_rate": 9.11062906724512e-06, |
| "loss": 0.6404, |
| "step": 303 |
| }, |
| { |
| "epoch": 1.7777777777777777, |
| "grad_norm": 0.18832199972802494, |
| "learning_rate": 9.067245119305858e-06, |
| "loss": 0.6853, |
| "step": 304 |
| }, |
| { |
| "epoch": 1.7836257309941521, |
| "grad_norm": 0.20655204935711033, |
| "learning_rate": 9.023861171366595e-06, |
| "loss": 0.7093, |
| "step": 305 |
| }, |
| { |
| "epoch": 1.7894736842105263, |
| "grad_norm": 0.19209280973506734, |
| "learning_rate": 8.980477223427332e-06, |
| "loss": 0.6548, |
| "step": 306 |
| }, |
| { |
| "epoch": 1.7953216374269005, |
| "grad_norm": 0.1885931981782652, |
| "learning_rate": 8.93709327548807e-06, |
| "loss": 0.6558, |
| "step": 307 |
| }, |
| { |
| "epoch": 1.801169590643275, |
| "grad_norm": 0.1962953890386984, |
| "learning_rate": 8.893709327548807e-06, |
| "loss": 0.6586, |
| "step": 308 |
| }, |
| { |
| "epoch": 1.807017543859649, |
| "grad_norm": 0.19945775782899686, |
| "learning_rate": 8.850325379609546e-06, |
| "loss": 0.6636, |
| "step": 309 |
| }, |
| { |
| "epoch": 1.8128654970760234, |
| "grad_norm": 0.1941326419111805, |
| "learning_rate": 8.806941431670283e-06, |
| "loss": 0.6615, |
| "step": 310 |
| }, |
| { |
| "epoch": 1.8187134502923976, |
| "grad_norm": 0.18927283838641645, |
| "learning_rate": 8.76355748373102e-06, |
| "loss": 0.6722, |
| "step": 311 |
| }, |
| { |
| "epoch": 1.8245614035087718, |
| "grad_norm": 0.18432693872655953, |
| "learning_rate": 8.720173535791757e-06, |
| "loss": 0.6522, |
| "step": 312 |
| }, |
| { |
| "epoch": 1.8304093567251463, |
| "grad_norm": 0.1971710237782894, |
| "learning_rate": 8.676789587852495e-06, |
| "loss": 0.6996, |
| "step": 313 |
| }, |
| { |
| "epoch": 1.8362573099415205, |
| "grad_norm": 0.1809013320142788, |
| "learning_rate": 8.633405639913232e-06, |
| "loss": 0.6476, |
| "step": 314 |
| }, |
| { |
| "epoch": 1.8421052631578947, |
| "grad_norm": 0.17273470066786814, |
| "learning_rate": 8.59002169197397e-06, |
| "loss": 0.6205, |
| "step": 315 |
| }, |
| { |
| "epoch": 1.8479532163742691, |
| "grad_norm": 0.1973244932241699, |
| "learning_rate": 8.546637744034708e-06, |
| "loss": 0.7028, |
| "step": 316 |
| }, |
| { |
| "epoch": 1.8538011695906431, |
| "grad_norm": 0.18443943998865936, |
| "learning_rate": 8.503253796095445e-06, |
| "loss": 0.6821, |
| "step": 317 |
| }, |
| { |
| "epoch": 1.8596491228070176, |
| "grad_norm": 0.19742863809842442, |
| "learning_rate": 8.459869848156183e-06, |
| "loss": 0.696, |
| "step": 318 |
| }, |
| { |
| "epoch": 1.8654970760233918, |
| "grad_norm": 0.19602002536800328, |
| "learning_rate": 8.416485900216922e-06, |
| "loss": 0.6643, |
| "step": 319 |
| }, |
| { |
| "epoch": 1.871345029239766, |
| "grad_norm": 0.18322608246185332, |
| "learning_rate": 8.373101952277657e-06, |
| "loss": 0.6877, |
| "step": 320 |
| }, |
| { |
| "epoch": 1.8771929824561404, |
| "grad_norm": 0.20312293700355982, |
| "learning_rate": 8.329718004338394e-06, |
| "loss": 0.6779, |
| "step": 321 |
| }, |
| { |
| "epoch": 1.8830409356725146, |
| "grad_norm": 0.18955838414122606, |
| "learning_rate": 8.286334056399133e-06, |
| "loss": 0.6889, |
| "step": 322 |
| }, |
| { |
| "epoch": 1.8888888888888888, |
| "grad_norm": 0.18608685857531174, |
| "learning_rate": 8.24295010845987e-06, |
| "loss": 0.7066, |
| "step": 323 |
| }, |
| { |
| "epoch": 1.8947368421052633, |
| "grad_norm": 0.19324997721387963, |
| "learning_rate": 8.199566160520608e-06, |
| "loss": 0.6721, |
| "step": 324 |
| }, |
| { |
| "epoch": 1.9005847953216373, |
| "grad_norm": 0.18269525520661356, |
| "learning_rate": 8.156182212581345e-06, |
| "loss": 0.6606, |
| "step": 325 |
| }, |
| { |
| "epoch": 1.9064327485380117, |
| "grad_norm": 0.17879213689825307, |
| "learning_rate": 8.112798264642084e-06, |
| "loss": 0.6195, |
| "step": 326 |
| }, |
| { |
| "epoch": 1.912280701754386, |
| "grad_norm": 0.19572563149944922, |
| "learning_rate": 8.06941431670282e-06, |
| "loss": 0.6553, |
| "step": 327 |
| }, |
| { |
| "epoch": 1.9181286549707601, |
| "grad_norm": 0.20455740497972336, |
| "learning_rate": 8.026030368763557e-06, |
| "loss": 0.7073, |
| "step": 328 |
| }, |
| { |
| "epoch": 1.9239766081871346, |
| "grad_norm": 0.20379817717927606, |
| "learning_rate": 7.982646420824296e-06, |
| "loss": 0.6656, |
| "step": 329 |
| }, |
| { |
| "epoch": 1.9298245614035088, |
| "grad_norm": 0.18816989178876325, |
| "learning_rate": 7.939262472885033e-06, |
| "loss": 0.6599, |
| "step": 330 |
| }, |
| { |
| "epoch": 1.935672514619883, |
| "grad_norm": 0.19040798822146188, |
| "learning_rate": 7.89587852494577e-06, |
| "loss": 0.6872, |
| "step": 331 |
| }, |
| { |
| "epoch": 1.9415204678362574, |
| "grad_norm": 0.2060421681157549, |
| "learning_rate": 7.85249457700651e-06, |
| "loss": 0.6634, |
| "step": 332 |
| }, |
| { |
| "epoch": 1.9473684210526314, |
| "grad_norm": 0.1841817001629427, |
| "learning_rate": 7.809110629067247e-06, |
| "loss": 0.6249, |
| "step": 333 |
| }, |
| { |
| "epoch": 1.9532163742690059, |
| "grad_norm": 0.19185741242924698, |
| "learning_rate": 7.765726681127982e-06, |
| "loss": 0.6603, |
| "step": 334 |
| }, |
| { |
| "epoch": 1.95906432748538, |
| "grad_norm": 0.17490775565813746, |
| "learning_rate": 7.722342733188721e-06, |
| "loss": 0.649, |
| "step": 335 |
| }, |
| { |
| "epoch": 1.9649122807017543, |
| "grad_norm": 0.18154097192716664, |
| "learning_rate": 7.678958785249458e-06, |
| "loss": 0.6869, |
| "step": 336 |
| }, |
| { |
| "epoch": 1.9707602339181287, |
| "grad_norm": 0.2171151900817146, |
| "learning_rate": 7.635574837310196e-06, |
| "loss": 0.6806, |
| "step": 337 |
| }, |
| { |
| "epoch": 1.976608187134503, |
| "grad_norm": 0.20056475561893633, |
| "learning_rate": 7.592190889370933e-06, |
| "loss": 0.6143, |
| "step": 338 |
| }, |
| { |
| "epoch": 1.9824561403508771, |
| "grad_norm": 0.1859196448723673, |
| "learning_rate": 7.548806941431671e-06, |
| "loss": 0.6565, |
| "step": 339 |
| }, |
| { |
| "epoch": 1.9883040935672516, |
| "grad_norm": 0.18291788926738473, |
| "learning_rate": 7.505422993492408e-06, |
| "loss": 0.6656, |
| "step": 340 |
| }, |
| { |
| "epoch": 1.9941520467836256, |
| "grad_norm": 0.1851247551589902, |
| "learning_rate": 7.4620390455531455e-06, |
| "loss": 0.658, |
| "step": 341 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.19091041161918318, |
| "learning_rate": 7.418655097613884e-06, |
| "loss": 0.675, |
| "step": 342 |
| }, |
| { |
| "epoch": 2.0058479532163744, |
| "grad_norm": 0.18034530461971127, |
| "learning_rate": 7.375271149674621e-06, |
| "loss": 0.6371, |
| "step": 343 |
| }, |
| { |
| "epoch": 2.0116959064327484, |
| "grad_norm": 0.17929321132321624, |
| "learning_rate": 7.331887201735358e-06, |
| "loss": 0.6238, |
| "step": 344 |
| }, |
| { |
| "epoch": 2.017543859649123, |
| "grad_norm": 0.1805743053336667, |
| "learning_rate": 7.288503253796096e-06, |
| "loss": 0.676, |
| "step": 345 |
| }, |
| { |
| "epoch": 2.023391812865497, |
| "grad_norm": 0.18134202268639932, |
| "learning_rate": 7.2451193058568335e-06, |
| "loss": 0.6926, |
| "step": 346 |
| }, |
| { |
| "epoch": 2.0292397660818713, |
| "grad_norm": 0.16664489258040083, |
| "learning_rate": 7.201735357917571e-06, |
| "loss": 0.635, |
| "step": 347 |
| }, |
| { |
| "epoch": 2.0350877192982457, |
| "grad_norm": 0.17418680651725119, |
| "learning_rate": 7.158351409978309e-06, |
| "loss": 0.6625, |
| "step": 348 |
| }, |
| { |
| "epoch": 2.0409356725146197, |
| "grad_norm": 0.16806000135863103, |
| "learning_rate": 7.114967462039046e-06, |
| "loss": 0.656, |
| "step": 349 |
| }, |
| { |
| "epoch": 2.046783625730994, |
| "grad_norm": 0.1766385026508446, |
| "learning_rate": 7.071583514099783e-06, |
| "loss": 0.644, |
| "step": 350 |
| }, |
| { |
| "epoch": 2.0526315789473686, |
| "grad_norm": 0.18299281472851398, |
| "learning_rate": 7.028199566160521e-06, |
| "loss": 0.6609, |
| "step": 351 |
| }, |
| { |
| "epoch": 2.0584795321637426, |
| "grad_norm": 0.20986189876178032, |
| "learning_rate": 6.984815618221259e-06, |
| "loss": 0.6113, |
| "step": 352 |
| }, |
| { |
| "epoch": 2.064327485380117, |
| "grad_norm": 0.17241912699938555, |
| "learning_rate": 6.941431670281996e-06, |
| "loss": 0.622, |
| "step": 353 |
| }, |
| { |
| "epoch": 2.0701754385964914, |
| "grad_norm": 0.17175110508577335, |
| "learning_rate": 6.898047722342733e-06, |
| "loss": 0.6475, |
| "step": 354 |
| }, |
| { |
| "epoch": 2.0760233918128654, |
| "grad_norm": 0.17952837380953865, |
| "learning_rate": 6.854663774403471e-06, |
| "loss": 0.624, |
| "step": 355 |
| }, |
| { |
| "epoch": 2.08187134502924, |
| "grad_norm": 0.16440737350129503, |
| "learning_rate": 6.8112798264642086e-06, |
| "loss": 0.6216, |
| "step": 356 |
| }, |
| { |
| "epoch": 2.087719298245614, |
| "grad_norm": 0.19647840255348978, |
| "learning_rate": 6.767895878524946e-06, |
| "loss": 0.6685, |
| "step": 357 |
| }, |
| { |
| "epoch": 2.0935672514619883, |
| "grad_norm": 0.1696642474859097, |
| "learning_rate": 6.724511930585684e-06, |
| "loss": 0.6513, |
| "step": 358 |
| }, |
| { |
| "epoch": 2.0994152046783627, |
| "grad_norm": 0.16781192390446642, |
| "learning_rate": 6.681127982646421e-06, |
| "loss": 0.6316, |
| "step": 359 |
| }, |
| { |
| "epoch": 2.1052631578947367, |
| "grad_norm": 0.17665396661182975, |
| "learning_rate": 6.6377440347071584e-06, |
| "loss": 0.6444, |
| "step": 360 |
| }, |
| { |
| "epoch": 2.111111111111111, |
| "grad_norm": 0.17026024356498806, |
| "learning_rate": 6.594360086767897e-06, |
| "loss": 0.6369, |
| "step": 361 |
| }, |
| { |
| "epoch": 2.116959064327485, |
| "grad_norm": 0.1771238959431666, |
| "learning_rate": 6.550976138828634e-06, |
| "loss": 0.6363, |
| "step": 362 |
| }, |
| { |
| "epoch": 2.1228070175438596, |
| "grad_norm": 0.18074195829403725, |
| "learning_rate": 6.507592190889371e-06, |
| "loss": 0.6295, |
| "step": 363 |
| }, |
| { |
| "epoch": 2.128654970760234, |
| "grad_norm": 0.17590315483807462, |
| "learning_rate": 6.464208242950108e-06, |
| "loss": 0.6352, |
| "step": 364 |
| }, |
| { |
| "epoch": 2.134502923976608, |
| "grad_norm": 0.1833679378524948, |
| "learning_rate": 6.420824295010846e-06, |
| "loss": 0.668, |
| "step": 365 |
| }, |
| { |
| "epoch": 2.1403508771929824, |
| "grad_norm": 0.17426945543091085, |
| "learning_rate": 6.377440347071584e-06, |
| "loss": 0.6309, |
| "step": 366 |
| }, |
| { |
| "epoch": 2.146198830409357, |
| "grad_norm": 0.17558570852982017, |
| "learning_rate": 6.334056399132321e-06, |
| "loss": 0.6183, |
| "step": 367 |
| }, |
| { |
| "epoch": 2.152046783625731, |
| "grad_norm": 0.18869020603808476, |
| "learning_rate": 6.29067245119306e-06, |
| "loss": 0.6743, |
| "step": 368 |
| }, |
| { |
| "epoch": 2.1578947368421053, |
| "grad_norm": 0.16860328391840887, |
| "learning_rate": 6.247288503253796e-06, |
| "loss": 0.6272, |
| "step": 369 |
| }, |
| { |
| "epoch": 2.1637426900584797, |
| "grad_norm": 0.1787201818661304, |
| "learning_rate": 6.2039045553145335e-06, |
| "loss": 0.6536, |
| "step": 370 |
| }, |
| { |
| "epoch": 2.1695906432748537, |
| "grad_norm": 0.17123056998213806, |
| "learning_rate": 6.1605206073752725e-06, |
| "loss": 0.6382, |
| "step": 371 |
| }, |
| { |
| "epoch": 2.175438596491228, |
| "grad_norm": 0.1687316996284582, |
| "learning_rate": 6.11713665943601e-06, |
| "loss": 0.6212, |
| "step": 372 |
| }, |
| { |
| "epoch": 2.181286549707602, |
| "grad_norm": 0.1891269844696612, |
| "learning_rate": 6.073752711496746e-06, |
| "loss": 0.6585, |
| "step": 373 |
| }, |
| { |
| "epoch": 2.1871345029239766, |
| "grad_norm": 0.1725455615706422, |
| "learning_rate": 6.030368763557483e-06, |
| "loss": 0.6559, |
| "step": 374 |
| }, |
| { |
| "epoch": 2.192982456140351, |
| "grad_norm": 0.16915435536877974, |
| "learning_rate": 5.986984815618222e-06, |
| "loss": 0.6324, |
| "step": 375 |
| }, |
| { |
| "epoch": 2.198830409356725, |
| "grad_norm": 0.17215684923648952, |
| "learning_rate": 5.943600867678959e-06, |
| "loss": 0.6539, |
| "step": 376 |
| }, |
| { |
| "epoch": 2.2046783625730995, |
| "grad_norm": 0.1954313719903045, |
| "learning_rate": 5.900216919739696e-06, |
| "loss": 0.6866, |
| "step": 377 |
| }, |
| { |
| "epoch": 2.2105263157894735, |
| "grad_norm": 0.17042598764998235, |
| "learning_rate": 5.856832971800435e-06, |
| "loss": 0.6558, |
| "step": 378 |
| }, |
| { |
| "epoch": 2.216374269005848, |
| "grad_norm": 0.17192364297534282, |
| "learning_rate": 5.813449023861172e-06, |
| "loss": 0.6378, |
| "step": 379 |
| }, |
| { |
| "epoch": 2.2222222222222223, |
| "grad_norm": 0.1739599234963019, |
| "learning_rate": 5.770065075921909e-06, |
| "loss": 0.6194, |
| "step": 380 |
| }, |
| { |
| "epoch": 2.2280701754385963, |
| "grad_norm": 0.17013107466272653, |
| "learning_rate": 5.7266811279826476e-06, |
| "loss": 0.6071, |
| "step": 381 |
| }, |
| { |
| "epoch": 2.2339181286549707, |
| "grad_norm": 0.1848300211606863, |
| "learning_rate": 5.683297180043385e-06, |
| "loss": 0.6859, |
| "step": 382 |
| }, |
| { |
| "epoch": 2.239766081871345, |
| "grad_norm": 0.17752768182741563, |
| "learning_rate": 5.639913232104122e-06, |
| "loss": 0.6674, |
| "step": 383 |
| }, |
| { |
| "epoch": 2.245614035087719, |
| "grad_norm": 0.17268014916608854, |
| "learning_rate": 5.59652928416486e-06, |
| "loss": 0.6447, |
| "step": 384 |
| }, |
| { |
| "epoch": 2.2514619883040936, |
| "grad_norm": 0.1975248493024482, |
| "learning_rate": 5.5531453362255974e-06, |
| "loss": 0.6877, |
| "step": 385 |
| }, |
| { |
| "epoch": 2.257309941520468, |
| "grad_norm": 0.1854455256428647, |
| "learning_rate": 5.509761388286335e-06, |
| "loss": 0.6663, |
| "step": 386 |
| }, |
| { |
| "epoch": 2.263157894736842, |
| "grad_norm": 0.18048830972034413, |
| "learning_rate": 5.466377440347071e-06, |
| "loss": 0.6515, |
| "step": 387 |
| }, |
| { |
| "epoch": 2.2690058479532165, |
| "grad_norm": 0.18529428469214002, |
| "learning_rate": 5.42299349240781e-06, |
| "loss": 0.6742, |
| "step": 388 |
| }, |
| { |
| "epoch": 2.2748538011695905, |
| "grad_norm": 0.1953029253712016, |
| "learning_rate": 5.379609544468547e-06, |
| "loss": 0.6715, |
| "step": 389 |
| }, |
| { |
| "epoch": 2.280701754385965, |
| "grad_norm": 0.18506576413704273, |
| "learning_rate": 5.3362255965292846e-06, |
| "loss": 0.6441, |
| "step": 390 |
| }, |
| { |
| "epoch": 2.2865497076023393, |
| "grad_norm": 0.20519359995385428, |
| "learning_rate": 5.292841648590023e-06, |
| "loss": 0.6324, |
| "step": 391 |
| }, |
| { |
| "epoch": 2.2923976608187133, |
| "grad_norm": 0.1812910105371836, |
| "learning_rate": 5.24945770065076e-06, |
| "loss": 0.6151, |
| "step": 392 |
| }, |
| { |
| "epoch": 2.2982456140350878, |
| "grad_norm": 0.16615863932290006, |
| "learning_rate": 5.206073752711497e-06, |
| "loss": 0.6178, |
| "step": 393 |
| }, |
| { |
| "epoch": 2.3040935672514617, |
| "grad_norm": 0.1867312948806079, |
| "learning_rate": 5.162689804772235e-06, |
| "loss": 0.6844, |
| "step": 394 |
| }, |
| { |
| "epoch": 2.309941520467836, |
| "grad_norm": 0.17482246796590165, |
| "learning_rate": 5.1193058568329725e-06, |
| "loss": 0.6316, |
| "step": 395 |
| }, |
| { |
| "epoch": 2.3157894736842106, |
| "grad_norm": 0.18919167148846638, |
| "learning_rate": 5.07592190889371e-06, |
| "loss": 0.6929, |
| "step": 396 |
| }, |
| { |
| "epoch": 2.3216374269005846, |
| "grad_norm": 0.17135779399399315, |
| "learning_rate": 5.032537960954448e-06, |
| "loss": 0.6507, |
| "step": 397 |
| }, |
| { |
| "epoch": 2.327485380116959, |
| "grad_norm": 0.16589752923541318, |
| "learning_rate": 4.989154013015185e-06, |
| "loss": 0.6169, |
| "step": 398 |
| }, |
| { |
| "epoch": 2.3333333333333335, |
| "grad_norm": 0.17836212191167625, |
| "learning_rate": 4.945770065075922e-06, |
| "loss": 0.6534, |
| "step": 399 |
| }, |
| { |
| "epoch": 2.3391812865497075, |
| "grad_norm": 0.17486989043138282, |
| "learning_rate": 4.90238611713666e-06, |
| "loss": 0.6229, |
| "step": 400 |
| }, |
| { |
| "epoch": 2.345029239766082, |
| "grad_norm": 0.18358705375708667, |
| "learning_rate": 4.859002169197397e-06, |
| "loss": 0.6806, |
| "step": 401 |
| }, |
| { |
| "epoch": 2.3508771929824563, |
| "grad_norm": 0.17755890153992399, |
| "learning_rate": 4.815618221258135e-06, |
| "loss": 0.6835, |
| "step": 402 |
| }, |
| { |
| "epoch": 2.3567251461988303, |
| "grad_norm": 0.1796432140151646, |
| "learning_rate": 4.772234273318872e-06, |
| "loss": 0.6643, |
| "step": 403 |
| }, |
| { |
| "epoch": 2.3625730994152048, |
| "grad_norm": 0.16924652263187123, |
| "learning_rate": 4.7288503253796095e-06, |
| "loss": 0.6157, |
| "step": 404 |
| }, |
| { |
| "epoch": 2.3684210526315788, |
| "grad_norm": 0.1726890776222668, |
| "learning_rate": 4.685466377440348e-06, |
| "loss": 0.6557, |
| "step": 405 |
| }, |
| { |
| "epoch": 2.374269005847953, |
| "grad_norm": 0.1780550008345725, |
| "learning_rate": 4.642082429501085e-06, |
| "loss": 0.6554, |
| "step": 406 |
| }, |
| { |
| "epoch": 2.3801169590643276, |
| "grad_norm": 0.17128524860089567, |
| "learning_rate": 4.598698481561822e-06, |
| "loss": 0.6768, |
| "step": 407 |
| }, |
| { |
| "epoch": 2.3859649122807016, |
| "grad_norm": 0.16558703660041527, |
| "learning_rate": 4.55531453362256e-06, |
| "loss": 0.6492, |
| "step": 408 |
| }, |
| { |
| "epoch": 2.391812865497076, |
| "grad_norm": 0.17532697429039085, |
| "learning_rate": 4.5119305856832975e-06, |
| "loss": 0.6572, |
| "step": 409 |
| }, |
| { |
| "epoch": 2.39766081871345, |
| "grad_norm": 0.16937166076280238, |
| "learning_rate": 4.468546637744035e-06, |
| "loss": 0.6628, |
| "step": 410 |
| }, |
| { |
| "epoch": 2.4035087719298245, |
| "grad_norm": 0.18312431667652093, |
| "learning_rate": 4.425162689804773e-06, |
| "loss": 0.6477, |
| "step": 411 |
| }, |
| { |
| "epoch": 2.409356725146199, |
| "grad_norm": 0.1695999967647095, |
| "learning_rate": 4.38177874186551e-06, |
| "loss": 0.6603, |
| "step": 412 |
| }, |
| { |
| "epoch": 2.415204678362573, |
| "grad_norm": 0.1720525919637609, |
| "learning_rate": 4.338394793926247e-06, |
| "loss": 0.6452, |
| "step": 413 |
| }, |
| { |
| "epoch": 2.4210526315789473, |
| "grad_norm": 0.16726491752955858, |
| "learning_rate": 4.295010845986985e-06, |
| "loss": 0.6199, |
| "step": 414 |
| }, |
| { |
| "epoch": 2.426900584795322, |
| "grad_norm": 0.2059763044769876, |
| "learning_rate": 4.251626898047723e-06, |
| "loss": 0.6783, |
| "step": 415 |
| }, |
| { |
| "epoch": 2.4327485380116958, |
| "grad_norm": 0.1731042493041403, |
| "learning_rate": 4.208242950108461e-06, |
| "loss": 0.6289, |
| "step": 416 |
| }, |
| { |
| "epoch": 2.43859649122807, |
| "grad_norm": 0.17595716841396303, |
| "learning_rate": 4.164859002169197e-06, |
| "loss": 0.6607, |
| "step": 417 |
| }, |
| { |
| "epoch": 2.4444444444444446, |
| "grad_norm": 0.17232699533316642, |
| "learning_rate": 4.121475054229935e-06, |
| "loss": 0.6203, |
| "step": 418 |
| }, |
| { |
| "epoch": 2.4502923976608186, |
| "grad_norm": 0.17550156147686838, |
| "learning_rate": 4.078091106290673e-06, |
| "loss": 0.6584, |
| "step": 419 |
| }, |
| { |
| "epoch": 2.456140350877193, |
| "grad_norm": 0.18080214333436065, |
| "learning_rate": 4.03470715835141e-06, |
| "loss": 0.6031, |
| "step": 420 |
| }, |
| { |
| "epoch": 2.461988304093567, |
| "grad_norm": 0.18048583412947314, |
| "learning_rate": 3.991323210412148e-06, |
| "loss": 0.6354, |
| "step": 421 |
| }, |
| { |
| "epoch": 2.4678362573099415, |
| "grad_norm": 0.18253929691844767, |
| "learning_rate": 3.947939262472885e-06, |
| "loss": 0.6502, |
| "step": 422 |
| }, |
| { |
| "epoch": 2.473684210526316, |
| "grad_norm": 0.1697304593286738, |
| "learning_rate": 3.904555314533623e-06, |
| "loss": 0.6332, |
| "step": 423 |
| }, |
| { |
| "epoch": 2.47953216374269, |
| "grad_norm": 0.17269048510291535, |
| "learning_rate": 3.8611713665943606e-06, |
| "loss": 0.6095, |
| "step": 424 |
| }, |
| { |
| "epoch": 2.4853801169590644, |
| "grad_norm": 0.16216619960446743, |
| "learning_rate": 3.817787418655098e-06, |
| "loss": 0.6112, |
| "step": 425 |
| }, |
| { |
| "epoch": 2.4912280701754383, |
| "grad_norm": 0.17239216132714047, |
| "learning_rate": 3.7744034707158355e-06, |
| "loss": 0.6715, |
| "step": 426 |
| }, |
| { |
| "epoch": 2.497076023391813, |
| "grad_norm": 0.1715509924251108, |
| "learning_rate": 3.7310195227765728e-06, |
| "loss": 0.6459, |
| "step": 427 |
| }, |
| { |
| "epoch": 2.502923976608187, |
| "grad_norm": 0.1674736258064931, |
| "learning_rate": 3.6876355748373104e-06, |
| "loss": 0.6355, |
| "step": 428 |
| }, |
| { |
| "epoch": 2.5087719298245617, |
| "grad_norm": 0.16465926005700326, |
| "learning_rate": 3.644251626898048e-06, |
| "loss": 0.6381, |
| "step": 429 |
| }, |
| { |
| "epoch": 2.5146198830409356, |
| "grad_norm": 0.1766218788353798, |
| "learning_rate": 3.6008676789587854e-06, |
| "loss": 0.67, |
| "step": 430 |
| }, |
| { |
| "epoch": 2.52046783625731, |
| "grad_norm": 0.17349720246234343, |
| "learning_rate": 3.557483731019523e-06, |
| "loss": 0.6493, |
| "step": 431 |
| }, |
| { |
| "epoch": 2.526315789473684, |
| "grad_norm": 0.167194985421623, |
| "learning_rate": 3.5140997830802603e-06, |
| "loss": 0.628, |
| "step": 432 |
| }, |
| { |
| "epoch": 2.5321637426900585, |
| "grad_norm": 0.1704752632069036, |
| "learning_rate": 3.470715835140998e-06, |
| "loss": 0.6431, |
| "step": 433 |
| }, |
| { |
| "epoch": 2.538011695906433, |
| "grad_norm": 0.18481707817941734, |
| "learning_rate": 3.4273318872017357e-06, |
| "loss": 0.6715, |
| "step": 434 |
| }, |
| { |
| "epoch": 2.543859649122807, |
| "grad_norm": 0.1953699500403843, |
| "learning_rate": 3.383947939262473e-06, |
| "loss": 0.6845, |
| "step": 435 |
| }, |
| { |
| "epoch": 2.5497076023391814, |
| "grad_norm": 0.16379216515216974, |
| "learning_rate": 3.3405639913232106e-06, |
| "loss": 0.6455, |
| "step": 436 |
| }, |
| { |
| "epoch": 2.5555555555555554, |
| "grad_norm": 0.1980120403081147, |
| "learning_rate": 3.2971800433839487e-06, |
| "loss": 0.6695, |
| "step": 437 |
| }, |
| { |
| "epoch": 2.56140350877193, |
| "grad_norm": 0.16118979174422027, |
| "learning_rate": 3.2537960954446855e-06, |
| "loss": 0.5928, |
| "step": 438 |
| }, |
| { |
| "epoch": 2.5672514619883042, |
| "grad_norm": 0.1657791823109499, |
| "learning_rate": 3.210412147505423e-06, |
| "loss": 0.645, |
| "step": 439 |
| }, |
| { |
| "epoch": 2.573099415204678, |
| "grad_norm": 0.17132915146971192, |
| "learning_rate": 3.1670281995661605e-06, |
| "loss": 0.6847, |
| "step": 440 |
| }, |
| { |
| "epoch": 2.5789473684210527, |
| "grad_norm": 0.16324444549230824, |
| "learning_rate": 3.123644251626898e-06, |
| "loss": 0.6413, |
| "step": 441 |
| }, |
| { |
| "epoch": 2.5847953216374266, |
| "grad_norm": 0.17488238495665867, |
| "learning_rate": 3.0802603036876362e-06, |
| "loss": 0.6321, |
| "step": 442 |
| }, |
| { |
| "epoch": 2.590643274853801, |
| "grad_norm": 0.17634328132329954, |
| "learning_rate": 3.036876355748373e-06, |
| "loss": 0.6784, |
| "step": 443 |
| }, |
| { |
| "epoch": 2.5964912280701755, |
| "grad_norm": 0.17868073636307982, |
| "learning_rate": 2.993492407809111e-06, |
| "loss": 0.678, |
| "step": 444 |
| }, |
| { |
| "epoch": 2.60233918128655, |
| "grad_norm": 0.1632381806494582, |
| "learning_rate": 2.950108459869848e-06, |
| "loss": 0.6266, |
| "step": 445 |
| }, |
| { |
| "epoch": 2.608187134502924, |
| "grad_norm": 0.16547418794872898, |
| "learning_rate": 2.906724511930586e-06, |
| "loss": 0.6328, |
| "step": 446 |
| }, |
| { |
| "epoch": 2.6140350877192984, |
| "grad_norm": 0.17622874984246908, |
| "learning_rate": 2.8633405639913238e-06, |
| "loss": 0.661, |
| "step": 447 |
| }, |
| { |
| "epoch": 2.6198830409356724, |
| "grad_norm": 0.16541694161777937, |
| "learning_rate": 2.819956616052061e-06, |
| "loss": 0.6236, |
| "step": 448 |
| }, |
| { |
| "epoch": 2.625730994152047, |
| "grad_norm": 0.1662936609526993, |
| "learning_rate": 2.7765726681127987e-06, |
| "loss": 0.6159, |
| "step": 449 |
| }, |
| { |
| "epoch": 2.6315789473684212, |
| "grad_norm": 0.16669675160496522, |
| "learning_rate": 2.7331887201735356e-06, |
| "loss": 0.6505, |
| "step": 450 |
| }, |
| { |
| "epoch": 2.6374269005847952, |
| "grad_norm": 0.16549634014330256, |
| "learning_rate": 2.6898047722342737e-06, |
| "loss": 0.6508, |
| "step": 451 |
| }, |
| { |
| "epoch": 2.6432748538011697, |
| "grad_norm": 0.1810240612515184, |
| "learning_rate": 2.6464208242950113e-06, |
| "loss": 0.6705, |
| "step": 452 |
| }, |
| { |
| "epoch": 2.6491228070175437, |
| "grad_norm": 0.17384206262358587, |
| "learning_rate": 2.6030368763557486e-06, |
| "loss": 0.6376, |
| "step": 453 |
| }, |
| { |
| "epoch": 2.654970760233918, |
| "grad_norm": 0.17845392301327417, |
| "learning_rate": 2.5596529284164863e-06, |
| "loss": 0.6726, |
| "step": 454 |
| }, |
| { |
| "epoch": 2.6608187134502925, |
| "grad_norm": 0.1998313342763234, |
| "learning_rate": 2.516268980477224e-06, |
| "loss": 0.7018, |
| "step": 455 |
| }, |
| { |
| "epoch": 2.6666666666666665, |
| "grad_norm": 0.16980658137466279, |
| "learning_rate": 2.472885032537961e-06, |
| "loss": 0.649, |
| "step": 456 |
| }, |
| { |
| "epoch": 2.672514619883041, |
| "grad_norm": 0.18303799471242801, |
| "learning_rate": 2.4295010845986985e-06, |
| "loss": 0.6807, |
| "step": 457 |
| }, |
| { |
| "epoch": 2.678362573099415, |
| "grad_norm": 0.16687064769711984, |
| "learning_rate": 2.386117136659436e-06, |
| "loss": 0.625, |
| "step": 458 |
| }, |
| { |
| "epoch": 2.6842105263157894, |
| "grad_norm": 0.17675345144700674, |
| "learning_rate": 2.342733188720174e-06, |
| "loss": 0.6877, |
| "step": 459 |
| }, |
| { |
| "epoch": 2.690058479532164, |
| "grad_norm": 0.18401049432140446, |
| "learning_rate": 2.299349240780911e-06, |
| "loss": 0.6887, |
| "step": 460 |
| }, |
| { |
| "epoch": 2.6959064327485383, |
| "grad_norm": 0.1744979749607572, |
| "learning_rate": 2.2559652928416487e-06, |
| "loss": 0.6589, |
| "step": 461 |
| }, |
| { |
| "epoch": 2.7017543859649122, |
| "grad_norm": 0.1746641852105471, |
| "learning_rate": 2.2125813449023864e-06, |
| "loss": 0.6495, |
| "step": 462 |
| }, |
| { |
| "epoch": 2.7076023391812867, |
| "grad_norm": 0.16657675516372344, |
| "learning_rate": 2.1691973969631237e-06, |
| "loss": 0.662, |
| "step": 463 |
| }, |
| { |
| "epoch": 2.7134502923976607, |
| "grad_norm": 0.17198446823209654, |
| "learning_rate": 2.1258134490238614e-06, |
| "loss": 0.6732, |
| "step": 464 |
| }, |
| { |
| "epoch": 2.719298245614035, |
| "grad_norm": 0.1666041499402243, |
| "learning_rate": 2.0824295010845986e-06, |
| "loss": 0.6812, |
| "step": 465 |
| }, |
| { |
| "epoch": 2.7251461988304095, |
| "grad_norm": 0.17396505588176064, |
| "learning_rate": 2.0390455531453363e-06, |
| "loss": 0.6591, |
| "step": 466 |
| }, |
| { |
| "epoch": 2.7309941520467835, |
| "grad_norm": 0.17207201652443582, |
| "learning_rate": 1.995661605206074e-06, |
| "loss": 0.6278, |
| "step": 467 |
| }, |
| { |
| "epoch": 2.736842105263158, |
| "grad_norm": 0.16767533054287867, |
| "learning_rate": 1.9522776572668117e-06, |
| "loss": 0.6508, |
| "step": 468 |
| }, |
| { |
| "epoch": 2.742690058479532, |
| "grad_norm": 0.17199489358502026, |
| "learning_rate": 1.908893709327549e-06, |
| "loss": 0.6652, |
| "step": 469 |
| }, |
| { |
| "epoch": 2.7485380116959064, |
| "grad_norm": 0.15742337242113655, |
| "learning_rate": 1.8655097613882864e-06, |
| "loss": 0.6281, |
| "step": 470 |
| }, |
| { |
| "epoch": 2.754385964912281, |
| "grad_norm": 0.16549888305173557, |
| "learning_rate": 1.822125813449024e-06, |
| "loss": 0.6257, |
| "step": 471 |
| }, |
| { |
| "epoch": 2.760233918128655, |
| "grad_norm": 0.17228844722867567, |
| "learning_rate": 1.7787418655097615e-06, |
| "loss": 0.6897, |
| "step": 472 |
| }, |
| { |
| "epoch": 2.7660818713450293, |
| "grad_norm": 0.16656984900009209, |
| "learning_rate": 1.735357917570499e-06, |
| "loss": 0.6576, |
| "step": 473 |
| }, |
| { |
| "epoch": 2.7719298245614032, |
| "grad_norm": 0.1617090427960584, |
| "learning_rate": 1.6919739696312365e-06, |
| "loss": 0.6375, |
| "step": 474 |
| }, |
| { |
| "epoch": 2.7777777777777777, |
| "grad_norm": 0.17066915492008342, |
| "learning_rate": 1.6485900216919743e-06, |
| "loss": 0.6434, |
| "step": 475 |
| }, |
| { |
| "epoch": 2.783625730994152, |
| "grad_norm": 0.17283365217712324, |
| "learning_rate": 1.6052060737527116e-06, |
| "loss": 0.6404, |
| "step": 476 |
| }, |
| { |
| "epoch": 2.7894736842105265, |
| "grad_norm": 0.16377562920106029, |
| "learning_rate": 1.561822125813449e-06, |
| "loss": 0.5996, |
| "step": 477 |
| }, |
| { |
| "epoch": 2.7953216374269005, |
| "grad_norm": 0.16639432488486533, |
| "learning_rate": 1.5184381778741865e-06, |
| "loss": 0.5969, |
| "step": 478 |
| }, |
| { |
| "epoch": 2.801169590643275, |
| "grad_norm": 0.16980646505093647, |
| "learning_rate": 1.475054229934924e-06, |
| "loss": 0.6769, |
| "step": 479 |
| }, |
| { |
| "epoch": 2.807017543859649, |
| "grad_norm": 0.1628222318868079, |
| "learning_rate": 1.4316702819956619e-06, |
| "loss": 0.6508, |
| "step": 480 |
| }, |
| { |
| "epoch": 2.8128654970760234, |
| "grad_norm": 0.18172158119006254, |
| "learning_rate": 1.3882863340563994e-06, |
| "loss": 0.6604, |
| "step": 481 |
| }, |
| { |
| "epoch": 2.818713450292398, |
| "grad_norm": 0.16423487898529526, |
| "learning_rate": 1.3449023861171368e-06, |
| "loss": 0.6228, |
| "step": 482 |
| }, |
| { |
| "epoch": 2.824561403508772, |
| "grad_norm": 0.17478062902651836, |
| "learning_rate": 1.3015184381778743e-06, |
| "loss": 0.6251, |
| "step": 483 |
| }, |
| { |
| "epoch": 2.8304093567251463, |
| "grad_norm": 0.1726032282493946, |
| "learning_rate": 1.258134490238612e-06, |
| "loss": 0.6735, |
| "step": 484 |
| }, |
| { |
| "epoch": 2.8362573099415203, |
| "grad_norm": 0.16790264066853555, |
| "learning_rate": 1.2147505422993492e-06, |
| "loss": 0.6434, |
| "step": 485 |
| }, |
| { |
| "epoch": 2.8421052631578947, |
| "grad_norm": 0.1671571499569638, |
| "learning_rate": 1.171366594360087e-06, |
| "loss": 0.6578, |
| "step": 486 |
| }, |
| { |
| "epoch": 2.847953216374269, |
| "grad_norm": 0.16863160149729373, |
| "learning_rate": 1.1279826464208244e-06, |
| "loss": 0.6242, |
| "step": 487 |
| }, |
| { |
| "epoch": 2.853801169590643, |
| "grad_norm": 0.161190538585518, |
| "learning_rate": 1.0845986984815618e-06, |
| "loss": 0.6342, |
| "step": 488 |
| }, |
| { |
| "epoch": 2.8596491228070176, |
| "grad_norm": 0.16562131765046972, |
| "learning_rate": 1.0412147505422993e-06, |
| "loss": 0.6346, |
| "step": 489 |
| }, |
| { |
| "epoch": 2.8654970760233915, |
| "grad_norm": 0.16478891417223968, |
| "learning_rate": 9.97830802603037e-07, |
| "loss": 0.629, |
| "step": 490 |
| }, |
| { |
| "epoch": 2.871345029239766, |
| "grad_norm": 0.1652066649082407, |
| "learning_rate": 9.544468546637745e-07, |
| "loss": 0.6512, |
| "step": 491 |
| }, |
| { |
| "epoch": 2.8771929824561404, |
| "grad_norm": 0.1808259238987679, |
| "learning_rate": 9.11062906724512e-07, |
| "loss": 0.6501, |
| "step": 492 |
| }, |
| { |
| "epoch": 2.883040935672515, |
| "grad_norm": 0.16595306747518687, |
| "learning_rate": 8.676789587852495e-07, |
| "loss": 0.6187, |
| "step": 493 |
| }, |
| { |
| "epoch": 2.888888888888889, |
| "grad_norm": 0.16577185891507523, |
| "learning_rate": 8.242950108459872e-07, |
| "loss": 0.6608, |
| "step": 494 |
| }, |
| { |
| "epoch": 2.8947368421052633, |
| "grad_norm": 0.17578227817996883, |
| "learning_rate": 7.809110629067245e-07, |
| "loss": 0.6522, |
| "step": 495 |
| }, |
| { |
| "epoch": 2.9005847953216373, |
| "grad_norm": 0.16712846714191626, |
| "learning_rate": 7.37527114967462e-07, |
| "loss": 0.6741, |
| "step": 496 |
| }, |
| { |
| "epoch": 2.9064327485380117, |
| "grad_norm": 0.1695052637928444, |
| "learning_rate": 6.941431670281997e-07, |
| "loss": 0.6697, |
| "step": 497 |
| }, |
| { |
| "epoch": 2.912280701754386, |
| "grad_norm": 0.16523586320140343, |
| "learning_rate": 6.507592190889371e-07, |
| "loss": 0.6359, |
| "step": 498 |
| }, |
| { |
| "epoch": 2.91812865497076, |
| "grad_norm": 0.17250068186561412, |
| "learning_rate": 6.073752711496746e-07, |
| "loss": 0.635, |
| "step": 499 |
| }, |
| { |
| "epoch": 2.9239766081871346, |
| "grad_norm": 0.15377259520433112, |
| "learning_rate": 5.639913232104122e-07, |
| "loss": 0.5981, |
| "step": 500 |
| }, |
| { |
| "epoch": 2.9298245614035086, |
| "grad_norm": 0.1658156010520523, |
| "learning_rate": 5.206073752711497e-07, |
| "loss": 0.6501, |
| "step": 501 |
| }, |
| { |
| "epoch": 2.935672514619883, |
| "grad_norm": 0.16408478791637582, |
| "learning_rate": 4.772234273318872e-07, |
| "loss": 0.6142, |
| "step": 502 |
| }, |
| { |
| "epoch": 2.9415204678362574, |
| "grad_norm": 0.16388275974704705, |
| "learning_rate": 4.3383947939262475e-07, |
| "loss": 0.6752, |
| "step": 503 |
| }, |
| { |
| "epoch": 2.9473684210526314, |
| "grad_norm": 0.16114934023396965, |
| "learning_rate": 3.9045553145336227e-07, |
| "loss": 0.6342, |
| "step": 504 |
| }, |
| { |
| "epoch": 2.953216374269006, |
| "grad_norm": 0.16383179577586124, |
| "learning_rate": 3.4707158351409984e-07, |
| "loss": 0.6357, |
| "step": 505 |
| }, |
| { |
| "epoch": 2.95906432748538, |
| "grad_norm": 0.15975403048273223, |
| "learning_rate": 3.036876355748373e-07, |
| "loss": 0.6188, |
| "step": 506 |
| }, |
| { |
| "epoch": 2.9649122807017543, |
| "grad_norm": 0.1676357353298206, |
| "learning_rate": 2.6030368763557483e-07, |
| "loss": 0.6311, |
| "step": 507 |
| }, |
| { |
| "epoch": 2.9707602339181287, |
| "grad_norm": 0.15959844257029077, |
| "learning_rate": 2.1691973969631237e-07, |
| "loss": 0.6397, |
| "step": 508 |
| }, |
| { |
| "epoch": 2.976608187134503, |
| "grad_norm": 0.1748528110908195, |
| "learning_rate": 1.7353579175704992e-07, |
| "loss": 0.6354, |
| "step": 509 |
| }, |
| { |
| "epoch": 2.982456140350877, |
| "grad_norm": 0.16899094676604337, |
| "learning_rate": 1.3015184381778741e-07, |
| "loss": 0.6553, |
| "step": 510 |
| }, |
| { |
| "epoch": 2.9883040935672516, |
| "grad_norm": 0.18419361328324801, |
| "learning_rate": 8.676789587852496e-08, |
| "loss": 0.6327, |
| "step": 511 |
| }, |
| { |
| "epoch": 2.9941520467836256, |
| "grad_norm": 0.1710576834882864, |
| "learning_rate": 4.338394793926248e-08, |
| "loss": 0.6288, |
| "step": 512 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 0.1608724141438565, |
| "learning_rate": 0.0, |
| "loss": 0.6311, |
| "step": 513 |
| }, |
| { |
| "epoch": 3.0, |
| "step": 513, |
| "total_flos": 233827146399744.0, |
| "train_loss": 0.7522663490349322, |
| "train_runtime": 28313.6625, |
| "train_samples_per_second": 0.289, |
| "train_steps_per_second": 0.018 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 513, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 150, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 233827146399744.0, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|