{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 6981, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00014324595330181923, "grad_norm": 52.62998580932617, "learning_rate": 9.523809523809526e-07, "loss": 4.378, "step": 1 }, { "epoch": 0.00028649190660363845, "grad_norm": 73.16095733642578, "learning_rate": 1.9047619047619051e-06, "loss": 4.5535, "step": 2 }, { "epoch": 0.0004297378599054577, "grad_norm": 72.34996032714844, "learning_rate": 2.8571428571428573e-06, "loss": 4.2944, "step": 3 }, { "epoch": 0.0005729838132072769, "grad_norm": 58.35626983642578, "learning_rate": 3.8095238095238102e-06, "loss": 4.7953, "step": 4 }, { "epoch": 0.0007162297665090961, "grad_norm": 87.64541625976562, "learning_rate": 4.7619047619047615e-06, "loss": 4.8454, "step": 5 }, { "epoch": 0.0008594757198109154, "grad_norm": 46.40412521362305, "learning_rate": 5.7142857142857145e-06, "loss": 4.2617, "step": 6 }, { "epoch": 0.0010027216731127346, "grad_norm": 43.070899963378906, "learning_rate": 6.666666666666667e-06, "loss": 3.9821, "step": 7 }, { "epoch": 0.0011459676264145538, "grad_norm": 34.57389831542969, "learning_rate": 7.6190476190476205e-06, "loss": 4.1808, "step": 8 }, { "epoch": 0.001289213579716373, "grad_norm": 29.77797508239746, "learning_rate": 8.571428571428573e-06, "loss": 3.8951, "step": 9 }, { "epoch": 0.0014324595330181922, "grad_norm": 23.34331703186035, "learning_rate": 9.523809523809523e-06, "loss": 3.6618, "step": 10 }, { "epoch": 0.0015757054863200115, "grad_norm": 19.189594268798828, "learning_rate": 1.0476190476190477e-05, "loss": 3.4963, "step": 11 }, { "epoch": 0.0017189514396218307, "grad_norm": 26.155723571777344, "learning_rate": 1.1428571428571429e-05, "loss": 3.6063, "step": 12 }, { "epoch": 0.00186219739292365, "grad_norm": 26.80401039123535, "learning_rate": 1.2380952380952381e-05, "loss": 3.2612, "step": 13 }, { "epoch": 0.0020054433462254693, "grad_norm": 31.526416778564453, "learning_rate": 1.3333333333333333e-05, "loss": 3.5169, "step": 14 }, { "epoch": 0.0021486892995272885, "grad_norm": 12.625049591064453, "learning_rate": 1.4285714285714285e-05, "loss": 2.8682, "step": 15 }, { "epoch": 0.0022919352528291076, "grad_norm": 8.687185287475586, "learning_rate": 1.5238095238095241e-05, "loss": 2.7834, "step": 16 }, { "epoch": 0.002435181206130927, "grad_norm": 6.890831470489502, "learning_rate": 1.6190476190476193e-05, "loss": 2.8426, "step": 17 }, { "epoch": 0.002578427159432746, "grad_norm": 5.462021827697754, "learning_rate": 1.7142857142857145e-05, "loss": 2.6669, "step": 18 }, { "epoch": 0.002721673112734565, "grad_norm": 5.529022216796875, "learning_rate": 1.8095238095238094e-05, "loss": 2.9437, "step": 19 }, { "epoch": 0.0028649190660363843, "grad_norm": 3.6151702404022217, "learning_rate": 1.9047619047619046e-05, "loss": 2.6288, "step": 20 }, { "epoch": 0.0030081650193382035, "grad_norm": 5.00957727432251, "learning_rate": 2e-05, "loss": 2.8314, "step": 21 }, { "epoch": 0.003151410972640023, "grad_norm": 7.0872721672058105, "learning_rate": 2.0952380952380954e-05, "loss": 3.1902, "step": 22 }, { "epoch": 0.0032946569259418423, "grad_norm": 4.451307773590088, "learning_rate": 2.1904761904761906e-05, "loss": 2.7511, "step": 23 }, { "epoch": 0.0034379028792436614, "grad_norm": 4.2892961502075195, "learning_rate": 2.2857142857142858e-05, "loss": 2.5161, "step": 24 }, { "epoch": 0.0035811488325454806, "grad_norm": 5.240790367126465, "learning_rate": 2.380952380952381e-05, "loss": 2.6864, "step": 25 }, { "epoch": 0.0037243947858473, "grad_norm": 4.106722354888916, "learning_rate": 2.4761904761904762e-05, "loss": 2.8796, "step": 26 }, { "epoch": 0.003867640739149119, "grad_norm": 3.803208827972412, "learning_rate": 2.5714285714285714e-05, "loss": 2.4555, "step": 27 }, { "epoch": 0.004010886692450939, "grad_norm": 3.8796379566192627, "learning_rate": 2.6666666666666667e-05, "loss": 2.4737, "step": 28 }, { "epoch": 0.004154132645752757, "grad_norm": 2.8030145168304443, "learning_rate": 2.7619047619047622e-05, "loss": 2.2508, "step": 29 }, { "epoch": 0.004297378599054577, "grad_norm": 2.5193729400634766, "learning_rate": 2.857142857142857e-05, "loss": 2.431, "step": 30 }, { "epoch": 0.004440624552356396, "grad_norm": 9.163617134094238, "learning_rate": 2.9523809523809526e-05, "loss": 2.4488, "step": 31 }, { "epoch": 0.004583870505658215, "grad_norm": 2.1598594188690186, "learning_rate": 3.0476190476190482e-05, "loss": 2.4681, "step": 32 }, { "epoch": 0.004727116458960034, "grad_norm": 1.586427927017212, "learning_rate": 3.142857142857143e-05, "loss": 2.4252, "step": 33 }, { "epoch": 0.004870362412261854, "grad_norm": 1.845141887664795, "learning_rate": 3.2380952380952386e-05, "loss": 2.5847, "step": 34 }, { "epoch": 0.005013608365563673, "grad_norm": 2.1396284103393555, "learning_rate": 3.3333333333333335e-05, "loss": 2.4139, "step": 35 }, { "epoch": 0.005156854318865492, "grad_norm": 1.5211702585220337, "learning_rate": 3.428571428571429e-05, "loss": 2.3666, "step": 36 }, { "epoch": 0.0053001002721673116, "grad_norm": 1.7743985652923584, "learning_rate": 3.523809523809524e-05, "loss": 2.3948, "step": 37 }, { "epoch": 0.00544334622546913, "grad_norm": 1.732538104057312, "learning_rate": 3.619047619047619e-05, "loss": 2.2876, "step": 38 }, { "epoch": 0.00558659217877095, "grad_norm": 1.6113958358764648, "learning_rate": 3.7142857142857143e-05, "loss": 2.4636, "step": 39 }, { "epoch": 0.005729838132072769, "grad_norm": 1.706359624862671, "learning_rate": 3.809523809523809e-05, "loss": 2.3282, "step": 40 }, { "epoch": 0.005873084085374588, "grad_norm": 1.6610397100448608, "learning_rate": 3.904761904761905e-05, "loss": 2.4035, "step": 41 }, { "epoch": 0.006016330038676407, "grad_norm": 1.9866093397140503, "learning_rate": 4e-05, "loss": 2.1891, "step": 42 }, { "epoch": 0.006159575991978227, "grad_norm": 1.3556532859802246, "learning_rate": 4.095238095238095e-05, "loss": 2.4585, "step": 43 }, { "epoch": 0.006302821945280046, "grad_norm": 1.4916921854019165, "learning_rate": 4.190476190476191e-05, "loss": 2.3047, "step": 44 }, { "epoch": 0.006446067898581865, "grad_norm": 1.7516685724258423, "learning_rate": 4.2857142857142856e-05, "loss": 2.3223, "step": 45 }, { "epoch": 0.0065893138518836845, "grad_norm": 1.7386003732681274, "learning_rate": 4.380952380952381e-05, "loss": 2.2594, "step": 46 }, { "epoch": 0.006732559805185503, "grad_norm": 1.6626988649368286, "learning_rate": 4.476190476190477e-05, "loss": 2.1952, "step": 47 }, { "epoch": 0.006875805758487323, "grad_norm": 1.5003330707550049, "learning_rate": 4.5714285714285716e-05, "loss": 2.2867, "step": 48 }, { "epoch": 0.007019051711789142, "grad_norm": 1.6588246822357178, "learning_rate": 4.666666666666667e-05, "loss": 2.1556, "step": 49 }, { "epoch": 0.007162297665090961, "grad_norm": 1.3544559478759766, "learning_rate": 4.761904761904762e-05, "loss": 2.1987, "step": 50 }, { "epoch": 0.00730554361839278, "grad_norm": 1.466995358467102, "learning_rate": 4.8571428571428576e-05, "loss": 2.1098, "step": 51 }, { "epoch": 0.0074487895716946, "grad_norm": 1.5698860883712769, "learning_rate": 4.9523809523809525e-05, "loss": 2.4818, "step": 52 }, { "epoch": 0.007592035524996419, "grad_norm": 1.5944514274597168, "learning_rate": 5.047619047619048e-05, "loss": 2.21, "step": 53 }, { "epoch": 0.007735281478298238, "grad_norm": 1.328236699104309, "learning_rate": 5.142857142857143e-05, "loss": 2.0475, "step": 54 }, { "epoch": 0.007878527431600058, "grad_norm": 1.5962390899658203, "learning_rate": 5.2380952380952384e-05, "loss": 2.2377, "step": 55 }, { "epoch": 0.008021773384901877, "grad_norm": 1.5493494272232056, "learning_rate": 5.333333333333333e-05, "loss": 2.2419, "step": 56 }, { "epoch": 0.008165019338203695, "grad_norm": 1.5521552562713623, "learning_rate": 5.428571428571428e-05, "loss": 2.0745, "step": 57 }, { "epoch": 0.008308265291505515, "grad_norm": 1.398070216178894, "learning_rate": 5.5238095238095244e-05, "loss": 2.2003, "step": 58 }, { "epoch": 0.008451511244807334, "grad_norm": 1.4636003971099854, "learning_rate": 5.619047619047619e-05, "loss": 2.2911, "step": 59 }, { "epoch": 0.008594757198109154, "grad_norm": 1.1128668785095215, "learning_rate": 5.714285714285714e-05, "loss": 2.0765, "step": 60 }, { "epoch": 0.008738003151410973, "grad_norm": 1.301720380783081, "learning_rate": 5.8095238095238104e-05, "loss": 2.1891, "step": 61 }, { "epoch": 0.008881249104712791, "grad_norm": 1.5715515613555908, "learning_rate": 5.904761904761905e-05, "loss": 2.237, "step": 62 }, { "epoch": 0.009024495058014611, "grad_norm": 1.0565502643585205, "learning_rate": 6e-05, "loss": 2.0478, "step": 63 }, { "epoch": 0.00916774101131643, "grad_norm": 1.0669859647750854, "learning_rate": 6.0952380952380964e-05, "loss": 2.2018, "step": 64 }, { "epoch": 0.00931098696461825, "grad_norm": 1.3056137561798096, "learning_rate": 6.19047619047619e-05, "loss": 2.048, "step": 65 }, { "epoch": 0.009454232917920068, "grad_norm": 1.6640822887420654, "learning_rate": 6.285714285714286e-05, "loss": 2.1539, "step": 66 }, { "epoch": 0.009597478871221888, "grad_norm": 1.364961862564087, "learning_rate": 6.38095238095238e-05, "loss": 2.1301, "step": 67 }, { "epoch": 0.009740724824523707, "grad_norm": 1.196677327156067, "learning_rate": 6.476190476190477e-05, "loss": 2.062, "step": 68 }, { "epoch": 0.009883970777825527, "grad_norm": 1.2314097881317139, "learning_rate": 6.571428571428571e-05, "loss": 2.1485, "step": 69 }, { "epoch": 0.010027216731127346, "grad_norm": 0.9541298747062683, "learning_rate": 6.666666666666667e-05, "loss": 2.0575, "step": 70 }, { "epoch": 0.010170462684429164, "grad_norm": 1.3738226890563965, "learning_rate": 6.761904761904763e-05, "loss": 2.2395, "step": 71 }, { "epoch": 0.010313708637730984, "grad_norm": 1.2460862398147583, "learning_rate": 6.857142857142858e-05, "loss": 2.3738, "step": 72 }, { "epoch": 0.010456954591032804, "grad_norm": 1.5750643014907837, "learning_rate": 6.952380952380952e-05, "loss": 2.1665, "step": 73 }, { "epoch": 0.010600200544334623, "grad_norm": 1.3629311323165894, "learning_rate": 7.047619047619048e-05, "loss": 2.0315, "step": 74 }, { "epoch": 0.010743446497636441, "grad_norm": 1.502973198890686, "learning_rate": 7.142857142857143e-05, "loss": 2.0981, "step": 75 }, { "epoch": 0.01088669245093826, "grad_norm": 1.2985895872116089, "learning_rate": 7.238095238095238e-05, "loss": 2.0413, "step": 76 }, { "epoch": 0.01102993840424008, "grad_norm": 1.2793923616409302, "learning_rate": 7.333333333333333e-05, "loss": 2.0376, "step": 77 }, { "epoch": 0.0111731843575419, "grad_norm": 1.0606118440628052, "learning_rate": 7.428571428571429e-05, "loss": 2.1592, "step": 78 }, { "epoch": 0.01131643031084372, "grad_norm": 1.101787805557251, "learning_rate": 7.523809523809524e-05, "loss": 2.2685, "step": 79 }, { "epoch": 0.011459676264145537, "grad_norm": 1.2614856958389282, "learning_rate": 7.619047619047618e-05, "loss": 2.1567, "step": 80 }, { "epoch": 0.011602922217447357, "grad_norm": 1.119423270225525, "learning_rate": 7.714285714285715e-05, "loss": 2.2293, "step": 81 }, { "epoch": 0.011746168170749176, "grad_norm": 1.317784070968628, "learning_rate": 7.80952380952381e-05, "loss": 2.2105, "step": 82 }, { "epoch": 0.011889414124050996, "grad_norm": 1.2064156532287598, "learning_rate": 7.904761904761905e-05, "loss": 2.0107, "step": 83 }, { "epoch": 0.012032660077352814, "grad_norm": 1.4291565418243408, "learning_rate": 8e-05, "loss": 2.2087, "step": 84 }, { "epoch": 0.012175906030654634, "grad_norm": 1.3053336143493652, "learning_rate": 8.095238095238096e-05, "loss": 2.1808, "step": 85 }, { "epoch": 0.012319151983956453, "grad_norm": 1.3130074739456177, "learning_rate": 8.19047619047619e-05, "loss": 2.3055, "step": 86 }, { "epoch": 0.012462397937258273, "grad_norm": 1.3293081521987915, "learning_rate": 8.285714285714287e-05, "loss": 2.1222, "step": 87 }, { "epoch": 0.012605643890560092, "grad_norm": 1.3411742448806763, "learning_rate": 8.380952380952382e-05, "loss": 2.2056, "step": 88 }, { "epoch": 0.01274888984386191, "grad_norm": 1.3213112354278564, "learning_rate": 8.476190476190477e-05, "loss": 2.0332, "step": 89 }, { "epoch": 0.01289213579716373, "grad_norm": 1.166063666343689, "learning_rate": 8.571428571428571e-05, "loss": 2.1501, "step": 90 }, { "epoch": 0.01303538175046555, "grad_norm": 1.291751742362976, "learning_rate": 8.666666666666667e-05, "loss": 2.1473, "step": 91 }, { "epoch": 0.013178627703767369, "grad_norm": 1.4382574558258057, "learning_rate": 8.761904761904762e-05, "loss": 1.9233, "step": 92 }, { "epoch": 0.013321873657069187, "grad_norm": 1.0422803163528442, "learning_rate": 8.857142857142857e-05, "loss": 2.0612, "step": 93 }, { "epoch": 0.013465119610371007, "grad_norm": 1.269909143447876, "learning_rate": 8.952380952380953e-05, "loss": 2.2321, "step": 94 }, { "epoch": 0.013608365563672826, "grad_norm": 1.3586599826812744, "learning_rate": 9.047619047619048e-05, "loss": 1.9371, "step": 95 }, { "epoch": 0.013751611516974646, "grad_norm": 1.4789193868637085, "learning_rate": 9.142857142857143e-05, "loss": 1.9251, "step": 96 }, { "epoch": 0.013894857470276465, "grad_norm": 1.35099458694458, "learning_rate": 9.238095238095239e-05, "loss": 2.2233, "step": 97 }, { "epoch": 0.014038103423578283, "grad_norm": 1.0202860832214355, "learning_rate": 9.333333333333334e-05, "loss": 2.1776, "step": 98 }, { "epoch": 0.014181349376880103, "grad_norm": 1.2178406715393066, "learning_rate": 9.428571428571429e-05, "loss": 2.0692, "step": 99 }, { "epoch": 0.014324595330181922, "grad_norm": 1.655634880065918, "learning_rate": 9.523809523809524e-05, "loss": 2.1217, "step": 100 }, { "epoch": 0.014467841283483742, "grad_norm": 1.3825751543045044, "learning_rate": 9.61904761904762e-05, "loss": 2.0644, "step": 101 }, { "epoch": 0.01461108723678556, "grad_norm": 1.1038153171539307, "learning_rate": 9.714285714285715e-05, "loss": 2.0095, "step": 102 }, { "epoch": 0.01475433319008738, "grad_norm": 1.4572913646697998, "learning_rate": 9.80952380952381e-05, "loss": 2.1681, "step": 103 }, { "epoch": 0.0148975791433892, "grad_norm": 1.0186891555786133, "learning_rate": 9.904761904761905e-05, "loss": 1.9391, "step": 104 }, { "epoch": 0.015040825096691019, "grad_norm": 1.3062372207641602, "learning_rate": 0.0001, "loss": 2.1501, "step": 105 }, { "epoch": 0.015184071049992838, "grad_norm": 1.5498818159103394, "learning_rate": 0.00010095238095238096, "loss": 2.1133, "step": 106 }, { "epoch": 0.015327317003294656, "grad_norm": 1.3852585554122925, "learning_rate": 0.0001019047619047619, "loss": 2.0824, "step": 107 }, { "epoch": 0.015470562956596476, "grad_norm": 1.3924949169158936, "learning_rate": 0.00010285714285714286, "loss": 2.1442, "step": 108 }, { "epoch": 0.015613808909898295, "grad_norm": 1.6720349788665771, "learning_rate": 0.00010380952380952383, "loss": 2.2242, "step": 109 }, { "epoch": 0.015757054863200115, "grad_norm": 1.2025669813156128, "learning_rate": 0.00010476190476190477, "loss": 2.2154, "step": 110 }, { "epoch": 0.015900300816501935, "grad_norm": 1.3543643951416016, "learning_rate": 0.00010571428571428572, "loss": 1.9969, "step": 111 }, { "epoch": 0.016043546769803754, "grad_norm": 1.2689183950424194, "learning_rate": 0.00010666666666666667, "loss": 2.0138, "step": 112 }, { "epoch": 0.016186792723105574, "grad_norm": 1.564605951309204, "learning_rate": 0.00010761904761904762, "loss": 2.1687, "step": 113 }, { "epoch": 0.01633003867640739, "grad_norm": 1.6459993124008179, "learning_rate": 0.00010857142857142856, "loss": 1.9199, "step": 114 }, { "epoch": 0.01647328462970921, "grad_norm": 1.2635573148727417, "learning_rate": 0.00010952380952380953, "loss": 2.0291, "step": 115 }, { "epoch": 0.01661653058301103, "grad_norm": 1.8486382961273193, "learning_rate": 0.00011047619047619049, "loss": 2.0796, "step": 116 }, { "epoch": 0.01675977653631285, "grad_norm": 1.4162074327468872, "learning_rate": 0.00011142857142857144, "loss": 2.0421, "step": 117 }, { "epoch": 0.01690302248961467, "grad_norm": 1.2740123271942139, "learning_rate": 0.00011238095238095239, "loss": 2.0934, "step": 118 }, { "epoch": 0.017046268442916488, "grad_norm": 1.0773913860321045, "learning_rate": 0.00011333333333333334, "loss": 2.2551, "step": 119 }, { "epoch": 0.017189514396218308, "grad_norm": 1.8074616193771362, "learning_rate": 0.00011428571428571428, "loss": 2.178, "step": 120 }, { "epoch": 0.017332760349520127, "grad_norm": 1.403294324874878, "learning_rate": 0.00011523809523809524, "loss": 1.9464, "step": 121 }, { "epoch": 0.017476006302821947, "grad_norm": 1.5251725912094116, "learning_rate": 0.00011619047619047621, "loss": 2.0521, "step": 122 }, { "epoch": 0.017619252256123763, "grad_norm": 1.192023754119873, "learning_rate": 0.00011714285714285715, "loss": 2.089, "step": 123 }, { "epoch": 0.017762498209425583, "grad_norm": 1.5174809694290161, "learning_rate": 0.0001180952380952381, "loss": 2.0408, "step": 124 }, { "epoch": 0.017905744162727402, "grad_norm": 1.5903838872909546, "learning_rate": 0.00011904761904761905, "loss": 2.0947, "step": 125 }, { "epoch": 0.018048990116029222, "grad_norm": 1.828599452972412, "learning_rate": 0.00012, "loss": 2.1239, "step": 126 }, { "epoch": 0.01819223606933104, "grad_norm": 1.2842992544174194, "learning_rate": 0.00012095238095238095, "loss": 2.0186, "step": 127 }, { "epoch": 0.01833548202263286, "grad_norm": 1.359697699546814, "learning_rate": 0.00012190476190476193, "loss": 1.9914, "step": 128 }, { "epoch": 0.01847872797593468, "grad_norm": 1.45980703830719, "learning_rate": 0.00012285714285714287, "loss": 1.977, "step": 129 }, { "epoch": 0.0186219739292365, "grad_norm": 1.476110816001892, "learning_rate": 0.0001238095238095238, "loss": 1.9878, "step": 130 }, { "epoch": 0.01876521988253832, "grad_norm": 1.417382836341858, "learning_rate": 0.00012476190476190478, "loss": 1.9036, "step": 131 }, { "epoch": 0.018908465835840136, "grad_norm": 1.361007809638977, "learning_rate": 0.00012571428571428572, "loss": 2.0625, "step": 132 }, { "epoch": 0.019051711789141956, "grad_norm": 1.3317992687225342, "learning_rate": 0.00012666666666666666, "loss": 2.093, "step": 133 }, { "epoch": 0.019194957742443775, "grad_norm": 1.46342933177948, "learning_rate": 0.0001276190476190476, "loss": 2.0367, "step": 134 }, { "epoch": 0.019338203695745595, "grad_norm": 1.2505223751068115, "learning_rate": 0.00012857142857142858, "loss": 2.2136, "step": 135 }, { "epoch": 0.019481449649047414, "grad_norm": 1.5655434131622314, "learning_rate": 0.00012952380952380954, "loss": 2.0605, "step": 136 }, { "epoch": 0.019624695602349234, "grad_norm": 1.0419528484344482, "learning_rate": 0.0001304761904761905, "loss": 1.9831, "step": 137 }, { "epoch": 0.019767941555651054, "grad_norm": 1.260705828666687, "learning_rate": 0.00013142857142857143, "loss": 1.9628, "step": 138 }, { "epoch": 0.019911187508952873, "grad_norm": 1.676471471786499, "learning_rate": 0.00013238095238095237, "loss": 2.1263, "step": 139 }, { "epoch": 0.020054433462254693, "grad_norm": 1.489518642425537, "learning_rate": 0.00013333333333333334, "loss": 2.1856, "step": 140 }, { "epoch": 0.02019767941555651, "grad_norm": 1.6098390817642212, "learning_rate": 0.00013428571428571428, "loss": 2.071, "step": 141 }, { "epoch": 0.02034092536885833, "grad_norm": 1.4824872016906738, "learning_rate": 0.00013523809523809525, "loss": 1.8687, "step": 142 }, { "epoch": 0.020484171322160148, "grad_norm": 1.339866280555725, "learning_rate": 0.0001361904761904762, "loss": 2.0715, "step": 143 }, { "epoch": 0.020627417275461968, "grad_norm": 1.6016653776168823, "learning_rate": 0.00013714285714285716, "loss": 2.0803, "step": 144 }, { "epoch": 0.020770663228763787, "grad_norm": 1.5500822067260742, "learning_rate": 0.0001380952380952381, "loss": 2.2148, "step": 145 }, { "epoch": 0.020913909182065607, "grad_norm": 1.6780601739883423, "learning_rate": 0.00013904761904761905, "loss": 1.9264, "step": 146 }, { "epoch": 0.021057155135367427, "grad_norm": 1.4226387739181519, "learning_rate": 0.00014, "loss": 2.1784, "step": 147 }, { "epoch": 0.021200401088669246, "grad_norm": 1.3277734518051147, "learning_rate": 0.00014095238095238096, "loss": 2.1087, "step": 148 }, { "epoch": 0.021343647041971066, "grad_norm": 1.3667608499526978, "learning_rate": 0.00014190476190476193, "loss": 2.0414, "step": 149 }, { "epoch": 0.021486892995272882, "grad_norm": 1.5164493322372437, "learning_rate": 0.00014285714285714287, "loss": 2.0465, "step": 150 }, { "epoch": 0.0216301389485747, "grad_norm": 1.1949714422225952, "learning_rate": 0.0001438095238095238, "loss": 2.023, "step": 151 }, { "epoch": 0.02177338490187652, "grad_norm": 1.3489972352981567, "learning_rate": 0.00014476190476190475, "loss": 2.0367, "step": 152 }, { "epoch": 0.02191663085517834, "grad_norm": 1.3608229160308838, "learning_rate": 0.00014571428571428572, "loss": 1.9096, "step": 153 }, { "epoch": 0.02205987680848016, "grad_norm": 1.09320068359375, "learning_rate": 0.00014666666666666666, "loss": 1.9983, "step": 154 }, { "epoch": 0.02220312276178198, "grad_norm": 1.4014626741409302, "learning_rate": 0.00014761904761904763, "loss": 2.0639, "step": 155 }, { "epoch": 0.0223463687150838, "grad_norm": 1.7282015085220337, "learning_rate": 0.00014857142857142857, "loss": 1.9276, "step": 156 }, { "epoch": 0.02248961466838562, "grad_norm": 1.368768572807312, "learning_rate": 0.00014952380952380954, "loss": 1.9422, "step": 157 }, { "epoch": 0.02263286062168744, "grad_norm": 2.4440808296203613, "learning_rate": 0.00015047619047619048, "loss": 1.921, "step": 158 }, { "epoch": 0.022776106574989255, "grad_norm": 1.4210387468338013, "learning_rate": 0.00015142857142857143, "loss": 1.9634, "step": 159 }, { "epoch": 0.022919352528291075, "grad_norm": 1.8992178440093994, "learning_rate": 0.00015238095238095237, "loss": 1.9508, "step": 160 }, { "epoch": 0.023062598481592894, "grad_norm": 1.416013240814209, "learning_rate": 0.00015333333333333334, "loss": 1.9391, "step": 161 }, { "epoch": 0.023205844434894714, "grad_norm": 1.3314744234085083, "learning_rate": 0.0001542857142857143, "loss": 1.9674, "step": 162 }, { "epoch": 0.023349090388196533, "grad_norm": 1.4793882369995117, "learning_rate": 0.00015523809523809525, "loss": 2.1416, "step": 163 }, { "epoch": 0.023492336341498353, "grad_norm": 1.5465744733810425, "learning_rate": 0.0001561904761904762, "loss": 1.9835, "step": 164 }, { "epoch": 0.023635582294800173, "grad_norm": 2.0043904781341553, "learning_rate": 0.00015714285714285716, "loss": 1.9472, "step": 165 }, { "epoch": 0.023778828248101992, "grad_norm": 1.2508498430252075, "learning_rate": 0.0001580952380952381, "loss": 2.0138, "step": 166 }, { "epoch": 0.023922074201403812, "grad_norm": 1.4031628370285034, "learning_rate": 0.00015904761904761904, "loss": 2.0395, "step": 167 }, { "epoch": 0.024065320154705628, "grad_norm": 1.2188282012939453, "learning_rate": 0.00016, "loss": 1.9438, "step": 168 }, { "epoch": 0.024208566108007448, "grad_norm": 1.2908705472946167, "learning_rate": 0.00016095238095238096, "loss": 1.8868, "step": 169 }, { "epoch": 0.024351812061309267, "grad_norm": 1.6482155323028564, "learning_rate": 0.00016190476190476192, "loss": 2.0801, "step": 170 }, { "epoch": 0.024495058014611087, "grad_norm": 1.8175079822540283, "learning_rate": 0.00016285714285714287, "loss": 2.1677, "step": 171 }, { "epoch": 0.024638303967912906, "grad_norm": 1.4252662658691406, "learning_rate": 0.0001638095238095238, "loss": 1.9151, "step": 172 }, { "epoch": 0.024781549921214726, "grad_norm": 1.1491389274597168, "learning_rate": 0.00016476190476190475, "loss": 2.1041, "step": 173 }, { "epoch": 0.024924795874516546, "grad_norm": 1.263916015625, "learning_rate": 0.00016571428571428575, "loss": 1.7433, "step": 174 }, { "epoch": 0.025068041827818365, "grad_norm": 1.422499418258667, "learning_rate": 0.0001666666666666667, "loss": 2.019, "step": 175 }, { "epoch": 0.025211287781120185, "grad_norm": 1.555964708328247, "learning_rate": 0.00016761904761904763, "loss": 1.9613, "step": 176 }, { "epoch": 0.025354533734422, "grad_norm": 1.4103100299835205, "learning_rate": 0.00016857142857142857, "loss": 2.0609, "step": 177 }, { "epoch": 0.02549777968772382, "grad_norm": 1.5142751932144165, "learning_rate": 0.00016952380952380954, "loss": 2.2082, "step": 178 }, { "epoch": 0.02564102564102564, "grad_norm": 1.527642846107483, "learning_rate": 0.00017047619047619048, "loss": 1.8856, "step": 179 }, { "epoch": 0.02578427159432746, "grad_norm": 1.2264988422393799, "learning_rate": 0.00017142857142857143, "loss": 2.0158, "step": 180 }, { "epoch": 0.02592751754762928, "grad_norm": 1.1729098558425903, "learning_rate": 0.0001723809523809524, "loss": 2.1225, "step": 181 }, { "epoch": 0.0260707635009311, "grad_norm": 1.666245698928833, "learning_rate": 0.00017333333333333334, "loss": 2.1028, "step": 182 }, { "epoch": 0.02621400945423292, "grad_norm": 2.012843132019043, "learning_rate": 0.0001742857142857143, "loss": 2.1184, "step": 183 }, { "epoch": 0.026357255407534738, "grad_norm": 1.640502691268921, "learning_rate": 0.00017523809523809525, "loss": 2.0897, "step": 184 }, { "epoch": 0.026500501360836558, "grad_norm": 1.4943275451660156, "learning_rate": 0.0001761904761904762, "loss": 1.9422, "step": 185 }, { "epoch": 0.026643747314138374, "grad_norm": 1.4250091314315796, "learning_rate": 0.00017714285714285713, "loss": 1.9238, "step": 186 }, { "epoch": 0.026786993267440194, "grad_norm": 1.699051856994629, "learning_rate": 0.0001780952380952381, "loss": 2.0062, "step": 187 }, { "epoch": 0.026930239220742013, "grad_norm": 1.597662091255188, "learning_rate": 0.00017904761904761907, "loss": 2.0476, "step": 188 }, { "epoch": 0.027073485174043833, "grad_norm": 1.4776837825775146, "learning_rate": 0.00018, "loss": 2.0264, "step": 189 }, { "epoch": 0.027216731127345652, "grad_norm": 1.5483368635177612, "learning_rate": 0.00018095238095238095, "loss": 2.0707, "step": 190 }, { "epoch": 0.027359977080647472, "grad_norm": 1.2471498250961304, "learning_rate": 0.00018190476190476192, "loss": 1.9984, "step": 191 }, { "epoch": 0.02750322303394929, "grad_norm": 1.325850248336792, "learning_rate": 0.00018285714285714286, "loss": 2.011, "step": 192 }, { "epoch": 0.02764646898725111, "grad_norm": 1.6419668197631836, "learning_rate": 0.0001838095238095238, "loss": 1.8366, "step": 193 }, { "epoch": 0.02778971494055293, "grad_norm": 1.1752102375030518, "learning_rate": 0.00018476190476190478, "loss": 2.0313, "step": 194 }, { "epoch": 0.027932960893854747, "grad_norm": 1.2214974164962769, "learning_rate": 0.00018571428571428572, "loss": 2.0463, "step": 195 }, { "epoch": 0.028076206847156567, "grad_norm": 1.4102656841278076, "learning_rate": 0.0001866666666666667, "loss": 1.9033, "step": 196 }, { "epoch": 0.028219452800458386, "grad_norm": 1.3860039710998535, "learning_rate": 0.00018761904761904763, "loss": 2.1156, "step": 197 }, { "epoch": 0.028362698753760206, "grad_norm": 1.4514634609222412, "learning_rate": 0.00018857142857142857, "loss": 2.0283, "step": 198 }, { "epoch": 0.028505944707062025, "grad_norm": 1.1847095489501953, "learning_rate": 0.0001895238095238095, "loss": 2.0435, "step": 199 }, { "epoch": 0.028649190660363845, "grad_norm": 1.2426539659500122, "learning_rate": 0.00019047619047619048, "loss": 2.0134, "step": 200 }, { "epoch": 0.028792436613665665, "grad_norm": 1.1418706178665161, "learning_rate": 0.00019142857142857145, "loss": 2.0571, "step": 201 }, { "epoch": 0.028935682566967484, "grad_norm": 1.5321160554885864, "learning_rate": 0.0001923809523809524, "loss": 2.1111, "step": 202 }, { "epoch": 0.029078928520269304, "grad_norm": 1.7054376602172852, "learning_rate": 0.00019333333333333333, "loss": 1.9351, "step": 203 }, { "epoch": 0.02922217447357112, "grad_norm": 1.5790408849716187, "learning_rate": 0.0001942857142857143, "loss": 2.0556, "step": 204 }, { "epoch": 0.02936542042687294, "grad_norm": 1.5955266952514648, "learning_rate": 0.00019523809523809525, "loss": 2.1258, "step": 205 }, { "epoch": 0.02950866638017476, "grad_norm": 1.4858895540237427, "learning_rate": 0.0001961904761904762, "loss": 1.9684, "step": 206 }, { "epoch": 0.02965191233347658, "grad_norm": 1.96603524684906, "learning_rate": 0.00019714285714285716, "loss": 1.9494, "step": 207 }, { "epoch": 0.0297951582867784, "grad_norm": 1.5131514072418213, "learning_rate": 0.0001980952380952381, "loss": 2.0232, "step": 208 }, { "epoch": 0.029938404240080218, "grad_norm": 1.564658761024475, "learning_rate": 0.00019904761904761907, "loss": 1.8898, "step": 209 }, { "epoch": 0.030081650193382038, "grad_norm": 1.1527990102767944, "learning_rate": 0.0002, "loss": 1.9758, "step": 210 }, { "epoch": 0.030224896146683857, "grad_norm": 1.4897857904434204, "learning_rate": 0.00019999998923623733, "loss": 1.9385, "step": 211 }, { "epoch": 0.030368142099985677, "grad_norm": 1.3621524572372437, "learning_rate": 0.00019999995694495155, "loss": 1.9114, "step": 212 }, { "epoch": 0.030511388053287496, "grad_norm": 1.5576889514923096, "learning_rate": 0.00019999990312614968, "loss": 1.8958, "step": 213 }, { "epoch": 0.030654634006589312, "grad_norm": 1.1676753759384155, "learning_rate": 0.00019999982777984328, "loss": 1.9371, "step": 214 }, { "epoch": 0.030797879959891132, "grad_norm": 1.2504295110702515, "learning_rate": 0.00019999973090604857, "loss": 2.0819, "step": 215 }, { "epoch": 0.03094112591319295, "grad_norm": 1.1602206230163574, "learning_rate": 0.0001999996125047864, "loss": 1.9313, "step": 216 }, { "epoch": 0.03108437186649477, "grad_norm": 1.1292641162872314, "learning_rate": 0.00019999947257608226, "loss": 1.8298, "step": 217 }, { "epoch": 0.03122761781979659, "grad_norm": 1.5617598295211792, "learning_rate": 0.0001999993111199663, "loss": 1.9535, "step": 218 }, { "epoch": 0.03137086377309841, "grad_norm": 1.4218353033065796, "learning_rate": 0.00019999912813647321, "loss": 2.0776, "step": 219 }, { "epoch": 0.03151410972640023, "grad_norm": 1.4078106880187988, "learning_rate": 0.00019999892362564244, "loss": 1.9998, "step": 220 }, { "epoch": 0.03165735567970205, "grad_norm": 1.036447286605835, "learning_rate": 0.00019999869758751803, "loss": 1.9766, "step": 221 }, { "epoch": 0.03180060163300387, "grad_norm": 1.2503416538238525, "learning_rate": 0.00019999845002214862, "loss": 1.9949, "step": 222 }, { "epoch": 0.03194384758630569, "grad_norm": 1.3780275583267212, "learning_rate": 0.00019999818092958745, "loss": 1.9773, "step": 223 }, { "epoch": 0.03208709353960751, "grad_norm": 1.2220666408538818, "learning_rate": 0.00019999789030989256, "loss": 2.0193, "step": 224 }, { "epoch": 0.03223033949290933, "grad_norm": 1.5099045038223267, "learning_rate": 0.00019999757816312639, "loss": 1.9151, "step": 225 }, { "epoch": 0.03237358544621115, "grad_norm": 1.236126184463501, "learning_rate": 0.0001999972444893562, "loss": 2.0094, "step": 226 }, { "epoch": 0.03251683139951296, "grad_norm": 1.5017805099487305, "learning_rate": 0.00019999688928865387, "loss": 2.0769, "step": 227 }, { "epoch": 0.03266007735281478, "grad_norm": 1.1269341707229614, "learning_rate": 0.00019999651256109578, "loss": 2.1085, "step": 228 }, { "epoch": 0.0328033233061166, "grad_norm": 1.117932915687561, "learning_rate": 0.00019999611430676306, "loss": 2.0881, "step": 229 }, { "epoch": 0.03294656925941842, "grad_norm": 1.7601226568222046, "learning_rate": 0.00019999569452574148, "loss": 1.8408, "step": 230 }, { "epoch": 0.03308981521272024, "grad_norm": 1.252386212348938, "learning_rate": 0.00019999525321812134, "loss": 1.9917, "step": 231 }, { "epoch": 0.03323306116602206, "grad_norm": 1.3065465688705444, "learning_rate": 0.0001999947903839977, "loss": 1.8518, "step": 232 }, { "epoch": 0.03337630711932388, "grad_norm": 1.2100579738616943, "learning_rate": 0.00019999430602347018, "loss": 1.9483, "step": 233 }, { "epoch": 0.0335195530726257, "grad_norm": 1.268904447555542, "learning_rate": 0.00019999380013664304, "loss": 1.9393, "step": 234 }, { "epoch": 0.03366279902592752, "grad_norm": 1.0347895622253418, "learning_rate": 0.0001999932727236252, "loss": 1.9474, "step": 235 }, { "epoch": 0.03380604497922934, "grad_norm": 1.6240085363388062, "learning_rate": 0.00019999272378453019, "loss": 1.9719, "step": 236 }, { "epoch": 0.033949290932531156, "grad_norm": 1.3529040813446045, "learning_rate": 0.0001999921533194762, "loss": 1.8698, "step": 237 }, { "epoch": 0.034092536885832976, "grad_norm": 1.3135417699813843, "learning_rate": 0.00019999156132858597, "loss": 2.0179, "step": 238 }, { "epoch": 0.034235782839134796, "grad_norm": 1.1252350807189941, "learning_rate": 0.000199990947811987, "loss": 1.8703, "step": 239 }, { "epoch": 0.034379028792436615, "grad_norm": 1.3984723091125488, "learning_rate": 0.00019999031276981142, "loss": 2.0266, "step": 240 }, { "epoch": 0.034522274745738435, "grad_norm": 1.2189693450927734, "learning_rate": 0.00019998965620219585, "loss": 2.0681, "step": 241 }, { "epoch": 0.034665520699040255, "grad_norm": 0.9576480388641357, "learning_rate": 0.00019998897810928162, "loss": 1.8396, "step": 242 }, { "epoch": 0.034808766652342074, "grad_norm": 1.2399569749832153, "learning_rate": 0.0001999882784912148, "loss": 2.0687, "step": 243 }, { "epoch": 0.034952012605643894, "grad_norm": 1.4409414529800415, "learning_rate": 0.00019998755734814593, "loss": 1.9458, "step": 244 }, { "epoch": 0.035095258558945706, "grad_norm": 1.0613911151885986, "learning_rate": 0.00019998681468023027, "loss": 1.7906, "step": 245 }, { "epoch": 0.035238504512247526, "grad_norm": 1.1053168773651123, "learning_rate": 0.00019998605048762768, "loss": 1.9425, "step": 246 }, { "epoch": 0.035381750465549346, "grad_norm": 1.332306146621704, "learning_rate": 0.0001999852647705027, "loss": 2.1021, "step": 247 }, { "epoch": 0.035524996418851165, "grad_norm": 1.0343549251556396, "learning_rate": 0.0001999844575290245, "loss": 2.0805, "step": 248 }, { "epoch": 0.035668242372152985, "grad_norm": 1.4516009092330933, "learning_rate": 0.00019998362876336677, "loss": 2.1489, "step": 249 }, { "epoch": 0.035811488325454804, "grad_norm": 1.2583619356155396, "learning_rate": 0.00019998277847370802, "loss": 2.1472, "step": 250 }, { "epoch": 0.035954734278756624, "grad_norm": 1.2077544927597046, "learning_rate": 0.00019998190666023122, "loss": 1.8608, "step": 251 }, { "epoch": 0.036097980232058444, "grad_norm": 1.1571005582809448, "learning_rate": 0.00019998101332312408, "loss": 2.0665, "step": 252 }, { "epoch": 0.03624122618536026, "grad_norm": 1.0337742567062378, "learning_rate": 0.00019998009846257896, "loss": 1.9617, "step": 253 }, { "epoch": 0.03638447213866208, "grad_norm": 1.277536153793335, "learning_rate": 0.00019997916207879275, "loss": 2.0556, "step": 254 }, { "epoch": 0.0365277180919639, "grad_norm": 1.2914535999298096, "learning_rate": 0.00019997820417196703, "loss": 1.9707, "step": 255 }, { "epoch": 0.03667096404526572, "grad_norm": 1.0359686613082886, "learning_rate": 0.00019997722474230804, "loss": 1.8728, "step": 256 }, { "epoch": 0.03681420999856754, "grad_norm": 1.415273666381836, "learning_rate": 0.00019997622379002661, "loss": 2.0969, "step": 257 }, { "epoch": 0.03695745595186936, "grad_norm": 1.3990389108657837, "learning_rate": 0.00019997520131533823, "loss": 1.8875, "step": 258 }, { "epoch": 0.03710070190517118, "grad_norm": 1.585178017616272, "learning_rate": 0.00019997415731846298, "loss": 2.1761, "step": 259 }, { "epoch": 0.037243947858473, "grad_norm": 1.2165923118591309, "learning_rate": 0.0001999730917996257, "loss": 1.8663, "step": 260 }, { "epoch": 0.03738719381177482, "grad_norm": 1.3180073499679565, "learning_rate": 0.00019997200475905565, "loss": 2.1149, "step": 261 }, { "epoch": 0.03753043976507664, "grad_norm": 1.3815861940383911, "learning_rate": 0.00019997089619698693, "loss": 1.8873, "step": 262 }, { "epoch": 0.03767368571837845, "grad_norm": 1.2912083864212036, "learning_rate": 0.00019996976611365812, "loss": 1.9973, "step": 263 }, { "epoch": 0.03781693167168027, "grad_norm": 1.3245762586593628, "learning_rate": 0.00019996861450931257, "loss": 1.8785, "step": 264 }, { "epoch": 0.03796017762498209, "grad_norm": 1.3757787942886353, "learning_rate": 0.00019996744138419813, "loss": 1.9808, "step": 265 }, { "epoch": 0.03810342357828391, "grad_norm": 1.3698952198028564, "learning_rate": 0.00019996624673856737, "loss": 1.8661, "step": 266 }, { "epoch": 0.03824666953158573, "grad_norm": 1.2606749534606934, "learning_rate": 0.00019996503057267745, "loss": 1.9493, "step": 267 }, { "epoch": 0.03838991548488755, "grad_norm": 1.1625889539718628, "learning_rate": 0.00019996379288679026, "loss": 1.9898, "step": 268 }, { "epoch": 0.03853316143818937, "grad_norm": 1.4336519241333008, "learning_rate": 0.00019996253368117212, "loss": 1.9362, "step": 269 }, { "epoch": 0.03867640739149119, "grad_norm": 1.3553346395492554, "learning_rate": 0.00019996125295609421, "loss": 1.9553, "step": 270 }, { "epoch": 0.03881965334479301, "grad_norm": 1.1704996824264526, "learning_rate": 0.0001999599507118322, "loss": 2.0389, "step": 271 }, { "epoch": 0.03896289929809483, "grad_norm": 1.2089797258377075, "learning_rate": 0.00019995862694866635, "loss": 1.9239, "step": 272 }, { "epoch": 0.03910614525139665, "grad_norm": 1.003516435623169, "learning_rate": 0.00019995728166688178, "loss": 1.9979, "step": 273 }, { "epoch": 0.03924939120469847, "grad_norm": 1.2670575380325317, "learning_rate": 0.00019995591486676801, "loss": 1.9217, "step": 274 }, { "epoch": 0.03939263715800029, "grad_norm": 1.3995469808578491, "learning_rate": 0.0001999545265486193, "loss": 1.8742, "step": 275 }, { "epoch": 0.03953588311130211, "grad_norm": 1.4435356855392456, "learning_rate": 0.0001999531167127345, "loss": 1.9226, "step": 276 }, { "epoch": 0.03967912906460393, "grad_norm": 1.2965413331985474, "learning_rate": 0.00019995168535941716, "loss": 1.8605, "step": 277 }, { "epoch": 0.039822375017905746, "grad_norm": 1.2421996593475342, "learning_rate": 0.00019995023248897535, "loss": 1.8853, "step": 278 }, { "epoch": 0.039965620971207566, "grad_norm": 1.2318283319473267, "learning_rate": 0.00019994875810172186, "loss": 1.8703, "step": 279 }, { "epoch": 0.040108866924509386, "grad_norm": 1.2504488229751587, "learning_rate": 0.00019994726219797412, "loss": 2.0319, "step": 280 }, { "epoch": 0.040252112877811205, "grad_norm": 1.4693644046783447, "learning_rate": 0.00019994574477805415, "loss": 1.8048, "step": 281 }, { "epoch": 0.04039535883111302, "grad_norm": 1.4368122816085815, "learning_rate": 0.0001999442058422886, "loss": 1.9194, "step": 282 }, { "epoch": 0.04053860478441484, "grad_norm": 1.974845290184021, "learning_rate": 0.00019994264539100875, "loss": 2.1299, "step": 283 }, { "epoch": 0.04068185073771666, "grad_norm": 1.0318002700805664, "learning_rate": 0.00019994106342455053, "loss": 1.935, "step": 284 }, { "epoch": 0.04082509669101848, "grad_norm": 1.3428254127502441, "learning_rate": 0.00019993945994325454, "loss": 1.9015, "step": 285 }, { "epoch": 0.040968342644320296, "grad_norm": 1.4674798250198364, "learning_rate": 0.00019993783494746592, "loss": 1.8721, "step": 286 }, { "epoch": 0.041111588597622116, "grad_norm": 1.1500144004821777, "learning_rate": 0.00019993618843753454, "loss": 1.884, "step": 287 }, { "epoch": 0.041254834550923936, "grad_norm": 1.508557677268982, "learning_rate": 0.00019993452041381483, "loss": 1.8718, "step": 288 }, { "epoch": 0.041398080504225755, "grad_norm": 1.1736414432525635, "learning_rate": 0.00019993283087666583, "loss": 1.9164, "step": 289 }, { "epoch": 0.041541326457527575, "grad_norm": 1.1913667917251587, "learning_rate": 0.0001999311198264513, "loss": 1.9966, "step": 290 }, { "epoch": 0.041684572410829394, "grad_norm": 1.1760199069976807, "learning_rate": 0.00019992938726353963, "loss": 1.9862, "step": 291 }, { "epoch": 0.041827818364131214, "grad_norm": 1.116284966468811, "learning_rate": 0.0001999276331883037, "loss": 2.1363, "step": 292 }, { "epoch": 0.041971064317433034, "grad_norm": 1.3764779567718506, "learning_rate": 0.0001999258576011212, "loss": 2.0099, "step": 293 }, { "epoch": 0.04211431027073485, "grad_norm": 1.110652208328247, "learning_rate": 0.00019992406050237433, "loss": 1.9664, "step": 294 }, { "epoch": 0.04225755622403667, "grad_norm": 1.2322907447814941, "learning_rate": 0.00019992224189244997, "loss": 1.8815, "step": 295 }, { "epoch": 0.04240080217733849, "grad_norm": 1.3137946128845215, "learning_rate": 0.00019992040177173963, "loss": 1.7849, "step": 296 }, { "epoch": 0.04254404813064031, "grad_norm": 1.1724376678466797, "learning_rate": 0.00019991854014063943, "loss": 2.039, "step": 297 }, { "epoch": 0.04268729408394213, "grad_norm": 1.3877332210540771, "learning_rate": 0.00019991665699955013, "loss": 1.8806, "step": 298 }, { "epoch": 0.04283054003724395, "grad_norm": 1.1249327659606934, "learning_rate": 0.00019991475234887714, "loss": 1.9858, "step": 299 }, { "epoch": 0.042973785990545764, "grad_norm": 1.104592204093933, "learning_rate": 0.0001999128261890305, "loss": 1.8972, "step": 300 }, { "epoch": 0.043117031943847584, "grad_norm": 1.0912657976150513, "learning_rate": 0.00019991087852042479, "loss": 1.8983, "step": 301 }, { "epoch": 0.0432602778971494, "grad_norm": 1.2322807312011719, "learning_rate": 0.00019990890934347937, "loss": 1.823, "step": 302 }, { "epoch": 0.04340352385045122, "grad_norm": 1.2307558059692383, "learning_rate": 0.00019990691865861814, "loss": 1.8042, "step": 303 }, { "epoch": 0.04354676980375304, "grad_norm": 1.3651338815689087, "learning_rate": 0.0001999049064662696, "loss": 1.9489, "step": 304 }, { "epoch": 0.04369001575705486, "grad_norm": 1.2109830379486084, "learning_rate": 0.00019990287276686698, "loss": 1.9445, "step": 305 }, { "epoch": 0.04383326171035668, "grad_norm": 1.2235767841339111, "learning_rate": 0.00019990081756084808, "loss": 2.149, "step": 306 }, { "epoch": 0.0439765076636585, "grad_norm": 1.5291017293930054, "learning_rate": 0.00019989874084865532, "loss": 1.8934, "step": 307 }, { "epoch": 0.04411975361696032, "grad_norm": 1.2952134609222412, "learning_rate": 0.00019989664263073573, "loss": 1.9818, "step": 308 }, { "epoch": 0.04426299957026214, "grad_norm": 1.3951518535614014, "learning_rate": 0.00019989452290754107, "loss": 1.9474, "step": 309 }, { "epoch": 0.04440624552356396, "grad_norm": 1.39193594455719, "learning_rate": 0.00019989238167952765, "loss": 1.8892, "step": 310 }, { "epoch": 0.04454949147686578, "grad_norm": 1.589078664779663, "learning_rate": 0.00019989021894715637, "loss": 2.0554, "step": 311 }, { "epoch": 0.0446927374301676, "grad_norm": 1.2662962675094604, "learning_rate": 0.00019988803471089286, "loss": 2.0473, "step": 312 }, { "epoch": 0.04483598338346942, "grad_norm": 1.1959353685379028, "learning_rate": 0.00019988582897120734, "loss": 1.9803, "step": 313 }, { "epoch": 0.04497922933677124, "grad_norm": 1.4584497213363647, "learning_rate": 0.0001998836017285746, "loss": 1.9695, "step": 314 }, { "epoch": 0.04512247529007306, "grad_norm": 1.480107069015503, "learning_rate": 0.00019988135298347416, "loss": 1.9043, "step": 315 }, { "epoch": 0.04526572124337488, "grad_norm": 1.7253074645996094, "learning_rate": 0.00019987908273639011, "loss": 1.7338, "step": 316 }, { "epoch": 0.0454089671966767, "grad_norm": 1.3314229249954224, "learning_rate": 0.00019987679098781115, "loss": 2.0523, "step": 317 }, { "epoch": 0.04555221314997851, "grad_norm": 1.2367300987243652, "learning_rate": 0.00019987447773823068, "loss": 1.9092, "step": 318 }, { "epoch": 0.04569545910328033, "grad_norm": 1.3592756986618042, "learning_rate": 0.00019987214298814666, "loss": 1.8355, "step": 319 }, { "epoch": 0.04583870505658215, "grad_norm": 1.755040168762207, "learning_rate": 0.00019986978673806172, "loss": 1.9794, "step": 320 }, { "epoch": 0.04598195100988397, "grad_norm": 1.7361351251602173, "learning_rate": 0.00019986740898848306, "loss": 1.9147, "step": 321 }, { "epoch": 0.04612519696318579, "grad_norm": 1.2127351760864258, "learning_rate": 0.0001998650097399226, "loss": 1.9654, "step": 322 }, { "epoch": 0.04626844291648761, "grad_norm": 1.4929267168045044, "learning_rate": 0.0001998625889928968, "loss": 1.8369, "step": 323 }, { "epoch": 0.04641168886978943, "grad_norm": 1.6062040328979492, "learning_rate": 0.00019986014674792683, "loss": 2.1023, "step": 324 }, { "epoch": 0.04655493482309125, "grad_norm": 1.0715123414993286, "learning_rate": 0.0001998576830055384, "loss": 1.9211, "step": 325 }, { "epoch": 0.04669818077639307, "grad_norm": 1.1279674768447876, "learning_rate": 0.0001998551977662619, "loss": 1.7204, "step": 326 }, { "epoch": 0.046841426729694886, "grad_norm": 1.1469101905822754, "learning_rate": 0.00019985269103063238, "loss": 1.82, "step": 327 }, { "epoch": 0.046984672682996706, "grad_norm": 1.3553428649902344, "learning_rate": 0.00019985016279918942, "loss": 1.9313, "step": 328 }, { "epoch": 0.047127918636298526, "grad_norm": 1.42116117477417, "learning_rate": 0.00019984761307247735, "loss": 1.8604, "step": 329 }, { "epoch": 0.047271164589600345, "grad_norm": 1.1581860780715942, "learning_rate": 0.00019984504185104502, "loss": 1.7452, "step": 330 }, { "epoch": 0.047414410542902165, "grad_norm": 1.3027849197387695, "learning_rate": 0.00019984244913544597, "loss": 1.9309, "step": 331 }, { "epoch": 0.047557656496203984, "grad_norm": 1.2336416244506836, "learning_rate": 0.00019983983492623833, "loss": 1.9478, "step": 332 }, { "epoch": 0.047700902449505804, "grad_norm": 1.3841291666030884, "learning_rate": 0.00019983719922398488, "loss": 1.977, "step": 333 }, { "epoch": 0.047844148402807624, "grad_norm": 1.1200348138809204, "learning_rate": 0.00019983454202925302, "loss": 2.029, "step": 334 }, { "epoch": 0.04798739435610944, "grad_norm": 0.9795233011245728, "learning_rate": 0.0001998318633426148, "loss": 1.9426, "step": 335 }, { "epoch": 0.048130640309411256, "grad_norm": 1.3202542066574097, "learning_rate": 0.0001998291631646468, "loss": 1.9019, "step": 336 }, { "epoch": 0.048273886262713075, "grad_norm": 1.3691173791885376, "learning_rate": 0.00019982644149593043, "loss": 1.9393, "step": 337 }, { "epoch": 0.048417132216014895, "grad_norm": 1.2442659139633179, "learning_rate": 0.0001998236983370515, "loss": 1.8201, "step": 338 }, { "epoch": 0.048560378169316715, "grad_norm": 1.8122320175170898, "learning_rate": 0.00019982093368860055, "loss": 1.9996, "step": 339 }, { "epoch": 0.048703624122618534, "grad_norm": 1.0892632007598877, "learning_rate": 0.0001998181475511728, "loss": 1.8105, "step": 340 }, { "epoch": 0.048846870075920354, "grad_norm": 1.5804370641708374, "learning_rate": 0.000199815339925368, "loss": 2.0603, "step": 341 }, { "epoch": 0.048990116029222174, "grad_norm": 1.505051612854004, "learning_rate": 0.00019981251081179052, "loss": 1.919, "step": 342 }, { "epoch": 0.04913336198252399, "grad_norm": 1.4446977376937866, "learning_rate": 0.00019980966021104947, "loss": 1.8723, "step": 343 }, { "epoch": 0.04927660793582581, "grad_norm": 1.1602543592453003, "learning_rate": 0.00019980678812375847, "loss": 1.9026, "step": 344 }, { "epoch": 0.04941985388912763, "grad_norm": 1.362926959991455, "learning_rate": 0.00019980389455053583, "loss": 1.9745, "step": 345 }, { "epoch": 0.04956309984242945, "grad_norm": 1.3263410329818726, "learning_rate": 0.00019980097949200444, "loss": 1.7165, "step": 346 }, { "epoch": 0.04970634579573127, "grad_norm": 1.3735880851745605, "learning_rate": 0.00019979804294879188, "loss": 2.0095, "step": 347 }, { "epoch": 0.04984959174903309, "grad_norm": 1.2267096042633057, "learning_rate": 0.00019979508492153026, "loss": 1.8871, "step": 348 }, { "epoch": 0.04999283770233491, "grad_norm": 1.0966641902923584, "learning_rate": 0.00019979210541085644, "loss": 1.925, "step": 349 }, { "epoch": 0.05013608365563673, "grad_norm": 1.2373641729354858, "learning_rate": 0.00019978910441741175, "loss": 2.1047, "step": 350 }, { "epoch": 0.05027932960893855, "grad_norm": 1.0201295614242554, "learning_rate": 0.00019978608194184228, "loss": 1.9623, "step": 351 }, { "epoch": 0.05042257556224037, "grad_norm": 1.0486125946044922, "learning_rate": 0.00019978303798479875, "loss": 1.9244, "step": 352 }, { "epoch": 0.05056582151554219, "grad_norm": 1.224714994430542, "learning_rate": 0.00019977997254693632, "loss": 1.8478, "step": 353 }, { "epoch": 0.050709067468844, "grad_norm": 1.1136339902877808, "learning_rate": 0.00019977688562891502, "loss": 1.9839, "step": 354 }, { "epoch": 0.05085231342214582, "grad_norm": 1.1282410621643066, "learning_rate": 0.00019977377723139932, "loss": 2.1154, "step": 355 }, { "epoch": 0.05099555937544764, "grad_norm": 1.1078453063964844, "learning_rate": 0.00019977064735505836, "loss": 1.9597, "step": 356 }, { "epoch": 0.05113880532874946, "grad_norm": 1.3309283256530762, "learning_rate": 0.00019976749600056603, "loss": 1.851, "step": 357 }, { "epoch": 0.05128205128205128, "grad_norm": 1.3199007511138916, "learning_rate": 0.00019976432316860067, "loss": 1.9931, "step": 358 }, { "epoch": 0.0514252972353531, "grad_norm": 1.443095088005066, "learning_rate": 0.00019976112885984525, "loss": 2.0769, "step": 359 }, { "epoch": 0.05156854318865492, "grad_norm": 1.2029110193252563, "learning_rate": 0.00019975791307498756, "loss": 1.7789, "step": 360 }, { "epoch": 0.05171178914195674, "grad_norm": 1.3783962726593018, "learning_rate": 0.0001997546758147198, "loss": 2.1912, "step": 361 }, { "epoch": 0.05185503509525856, "grad_norm": 1.3991073369979858, "learning_rate": 0.00019975141707973882, "loss": 1.8083, "step": 362 }, { "epoch": 0.05199828104856038, "grad_norm": 1.2664425373077393, "learning_rate": 0.00019974813687074626, "loss": 1.9279, "step": 363 }, { "epoch": 0.0521415270018622, "grad_norm": 1.3429018259048462, "learning_rate": 0.0001997448351884482, "loss": 1.8489, "step": 364 }, { "epoch": 0.05228477295516402, "grad_norm": 1.5050594806671143, "learning_rate": 0.00019974151203355545, "loss": 2.214, "step": 365 }, { "epoch": 0.05242801890846584, "grad_norm": 1.1761552095413208, "learning_rate": 0.00019973816740678335, "loss": 1.814, "step": 366 }, { "epoch": 0.05257126486176766, "grad_norm": 1.0740997791290283, "learning_rate": 0.00019973480130885195, "loss": 1.9163, "step": 367 }, { "epoch": 0.052714510815069476, "grad_norm": 1.1985816955566406, "learning_rate": 0.00019973141374048588, "loss": 1.9949, "step": 368 }, { "epoch": 0.052857756768371296, "grad_norm": 1.2558863162994385, "learning_rate": 0.0001997280047024144, "loss": 1.7374, "step": 369 }, { "epoch": 0.053001002721673116, "grad_norm": 1.0561991930007935, "learning_rate": 0.00019972457419537143, "loss": 1.8152, "step": 370 }, { "epoch": 0.053144248674974935, "grad_norm": 1.1805089712142944, "learning_rate": 0.0001997211222200954, "loss": 1.859, "step": 371 }, { "epoch": 0.05328749462827675, "grad_norm": 1.220906138420105, "learning_rate": 0.0001997176487773295, "loss": 1.8891, "step": 372 }, { "epoch": 0.05343074058157857, "grad_norm": 1.2898125648498535, "learning_rate": 0.0001997141538678214, "loss": 2.0135, "step": 373 }, { "epoch": 0.05357398653488039, "grad_norm": 1.3417600393295288, "learning_rate": 0.00019971063749232352, "loss": 1.7994, "step": 374 }, { "epoch": 0.05371723248818221, "grad_norm": 1.2672240734100342, "learning_rate": 0.0001997070996515929, "loss": 2.0835, "step": 375 }, { "epoch": 0.053860478441484026, "grad_norm": 1.6356720924377441, "learning_rate": 0.00019970354034639103, "loss": 1.8802, "step": 376 }, { "epoch": 0.054003724394785846, "grad_norm": 1.3964208364486694, "learning_rate": 0.00019969995957748422, "loss": 1.9958, "step": 377 }, { "epoch": 0.054146970348087665, "grad_norm": 1.1695609092712402, "learning_rate": 0.0001996963573456433, "loss": 1.9214, "step": 378 }, { "epoch": 0.054290216301389485, "grad_norm": 1.5725616216659546, "learning_rate": 0.0001996927336516438, "loss": 2.0466, "step": 379 }, { "epoch": 0.054433462254691305, "grad_norm": 1.2054401636123657, "learning_rate": 0.00019968908849626567, "loss": 2.0911, "step": 380 }, { "epoch": 0.054576708207993124, "grad_norm": 1.2492305040359497, "learning_rate": 0.00019968542188029376, "loss": 1.9562, "step": 381 }, { "epoch": 0.054719954161294944, "grad_norm": 1.084065556526184, "learning_rate": 0.00019968173380451734, "loss": 1.8762, "step": 382 }, { "epoch": 0.054863200114596763, "grad_norm": 1.216033935546875, "learning_rate": 0.00019967802426973039, "loss": 1.7642, "step": 383 }, { "epoch": 0.05500644606789858, "grad_norm": 1.237306833267212, "learning_rate": 0.00019967429327673143, "loss": 2.0107, "step": 384 }, { "epoch": 0.0551496920212004, "grad_norm": 1.070798397064209, "learning_rate": 0.0001996705408263237, "loss": 1.8986, "step": 385 }, { "epoch": 0.05529293797450222, "grad_norm": 1.2178452014923096, "learning_rate": 0.00019966676691931498, "loss": 2.0638, "step": 386 }, { "epoch": 0.05543618392780404, "grad_norm": 1.8245280981063843, "learning_rate": 0.00019966297155651775, "loss": 2.0658, "step": 387 }, { "epoch": 0.05557942988110586, "grad_norm": 1.1085350513458252, "learning_rate": 0.00019965915473874898, "loss": 1.9441, "step": 388 }, { "epoch": 0.05572267583440768, "grad_norm": 1.253321886062622, "learning_rate": 0.0001996553164668304, "loss": 1.9066, "step": 389 }, { "epoch": 0.055865921787709494, "grad_norm": 1.1831797361373901, "learning_rate": 0.00019965145674158825, "loss": 1.9374, "step": 390 }, { "epoch": 0.05600916774101131, "grad_norm": 1.2637805938720703, "learning_rate": 0.00019964757556385348, "loss": 2.1155, "step": 391 }, { "epoch": 0.05615241369431313, "grad_norm": 1.1922460794448853, "learning_rate": 0.00019964367293446154, "loss": 2.0441, "step": 392 }, { "epoch": 0.05629565964761495, "grad_norm": 1.1540061235427856, "learning_rate": 0.00019963974885425266, "loss": 1.9592, "step": 393 }, { "epoch": 0.05643890560091677, "grad_norm": 1.3633723258972168, "learning_rate": 0.00019963580332407155, "loss": 1.8697, "step": 394 }, { "epoch": 0.05658215155421859, "grad_norm": 1.4259278774261475, "learning_rate": 0.00019963183634476756, "loss": 1.7931, "step": 395 }, { "epoch": 0.05672539750752041, "grad_norm": 1.4136995077133179, "learning_rate": 0.00019962784791719473, "loss": 1.9476, "step": 396 }, { "epoch": 0.05686864346082223, "grad_norm": 1.2864242792129517, "learning_rate": 0.00019962383804221164, "loss": 1.7351, "step": 397 }, { "epoch": 0.05701188941412405, "grad_norm": 1.0674023628234863, "learning_rate": 0.00019961980672068155, "loss": 1.9391, "step": 398 }, { "epoch": 0.05715513536742587, "grad_norm": 1.445884346961975, "learning_rate": 0.00019961575395347224, "loss": 1.8515, "step": 399 }, { "epoch": 0.05729838132072769, "grad_norm": 1.2526741027832031, "learning_rate": 0.00019961167974145625, "loss": 2.0766, "step": 400 }, { "epoch": 0.05744162727402951, "grad_norm": 1.2238144874572754, "learning_rate": 0.0001996075840855106, "loss": 1.785, "step": 401 }, { "epoch": 0.05758487322733133, "grad_norm": 1.074501395225525, "learning_rate": 0.000199603466986517, "loss": 1.7581, "step": 402 }, { "epoch": 0.05772811918063315, "grad_norm": 1.2030830383300781, "learning_rate": 0.00019959932844536177, "loss": 1.8801, "step": 403 }, { "epoch": 0.05787136513393497, "grad_norm": 1.5921425819396973, "learning_rate": 0.0001995951684629358, "loss": 1.7772, "step": 404 }, { "epoch": 0.05801461108723679, "grad_norm": 1.1489605903625488, "learning_rate": 0.0001995909870401347, "loss": 1.8793, "step": 405 }, { "epoch": 0.05815785704053861, "grad_norm": 1.4695888757705688, "learning_rate": 0.00019958678417785858, "loss": 1.7537, "step": 406 }, { "epoch": 0.05830110299384043, "grad_norm": 1.2490322589874268, "learning_rate": 0.00019958255987701217, "loss": 1.8742, "step": 407 }, { "epoch": 0.05844434894714224, "grad_norm": 1.002428412437439, "learning_rate": 0.00019957831413850493, "loss": 1.919, "step": 408 }, { "epoch": 0.05858759490044406, "grad_norm": 1.371732473373413, "learning_rate": 0.00019957404696325086, "loss": 1.8677, "step": 409 }, { "epoch": 0.05873084085374588, "grad_norm": 1.3012187480926514, "learning_rate": 0.00019956975835216852, "loss": 1.8269, "step": 410 }, { "epoch": 0.0588740868070477, "grad_norm": 1.0814388990402222, "learning_rate": 0.0001995654483061812, "loss": 1.994, "step": 411 }, { "epoch": 0.05901733276034952, "grad_norm": 1.2696882486343384, "learning_rate": 0.00019956111682621673, "loss": 1.9509, "step": 412 }, { "epoch": 0.05916057871365134, "grad_norm": 1.4992769956588745, "learning_rate": 0.00019955676391320752, "loss": 1.7334, "step": 413 }, { "epoch": 0.05930382466695316, "grad_norm": 1.1066880226135254, "learning_rate": 0.00019955238956809073, "loss": 2.0837, "step": 414 }, { "epoch": 0.05944707062025498, "grad_norm": 0.9836655855178833, "learning_rate": 0.00019954799379180797, "loss": 1.9481, "step": 415 }, { "epoch": 0.0595903165735568, "grad_norm": 1.1411279439926147, "learning_rate": 0.0001995435765853056, "loss": 1.9364, "step": 416 }, { "epoch": 0.059733562526858616, "grad_norm": 1.1237086057662964, "learning_rate": 0.00019953913794953451, "loss": 1.9559, "step": 417 }, { "epoch": 0.059876808480160436, "grad_norm": 1.1633782386779785, "learning_rate": 0.0001995346778854502, "loss": 1.6833, "step": 418 }, { "epoch": 0.060020054433462255, "grad_norm": 1.5859320163726807, "learning_rate": 0.00019953019639401288, "loss": 1.8446, "step": 419 }, { "epoch": 0.060163300386764075, "grad_norm": 1.499493956565857, "learning_rate": 0.00019952569347618727, "loss": 1.9842, "step": 420 }, { "epoch": 0.060306546340065895, "grad_norm": 1.256356120109558, "learning_rate": 0.0001995211691329427, "loss": 1.8953, "step": 421 }, { "epoch": 0.060449792293367714, "grad_norm": 1.6246408224105835, "learning_rate": 0.00019951662336525322, "loss": 1.9688, "step": 422 }, { "epoch": 0.060593038246669534, "grad_norm": 1.554673433303833, "learning_rate": 0.00019951205617409735, "loss": 1.9671, "step": 423 }, { "epoch": 0.060736284199971353, "grad_norm": 1.3998842239379883, "learning_rate": 0.00019950746756045834, "loss": 2.148, "step": 424 }, { "epoch": 0.06087953015327317, "grad_norm": 1.192581295967102, "learning_rate": 0.00019950285752532397, "loss": 2.0603, "step": 425 }, { "epoch": 0.06102277610657499, "grad_norm": 1.3526045083999634, "learning_rate": 0.00019949822606968673, "loss": 2.0272, "step": 426 }, { "epoch": 0.061166022059876805, "grad_norm": 1.4473577737808228, "learning_rate": 0.0001994935731945436, "loss": 1.8315, "step": 427 }, { "epoch": 0.061309268013178625, "grad_norm": 1.125598430633545, "learning_rate": 0.00019948889890089623, "loss": 2.0276, "step": 428 }, { "epoch": 0.061452513966480445, "grad_norm": 1.403790831565857, "learning_rate": 0.00019948420318975093, "loss": 2.0805, "step": 429 }, { "epoch": 0.061595759919782264, "grad_norm": 1.4935063123703003, "learning_rate": 0.0001994794860621185, "loss": 1.9328, "step": 430 }, { "epoch": 0.061739005873084084, "grad_norm": 1.2201225757598877, "learning_rate": 0.00019947474751901448, "loss": 1.8631, "step": 431 }, { "epoch": 0.0618822518263859, "grad_norm": 1.3769631385803223, "learning_rate": 0.0001994699875614589, "loss": 2.1091, "step": 432 }, { "epoch": 0.06202549777968772, "grad_norm": 1.2667357921600342, "learning_rate": 0.00019946520619047652, "loss": 1.8435, "step": 433 }, { "epoch": 0.06216874373298954, "grad_norm": 1.0790140628814697, "learning_rate": 0.00019946040340709665, "loss": 1.9343, "step": 434 }, { "epoch": 0.06231198968629136, "grad_norm": 1.2820956707000732, "learning_rate": 0.00019945557921235318, "loss": 1.9031, "step": 435 }, { "epoch": 0.06245523563959318, "grad_norm": 1.2308553457260132, "learning_rate": 0.00019945073360728462, "loss": 1.947, "step": 436 }, { "epoch": 0.062598481592895, "grad_norm": 1.1657029390335083, "learning_rate": 0.00019944586659293416, "loss": 1.8207, "step": 437 }, { "epoch": 0.06274172754619682, "grad_norm": 1.113720417022705, "learning_rate": 0.00019944097817034952, "loss": 1.9612, "step": 438 }, { "epoch": 0.06288497349949863, "grad_norm": 1.4692860841751099, "learning_rate": 0.00019943606834058305, "loss": 1.9536, "step": 439 }, { "epoch": 0.06302821945280046, "grad_norm": 1.2908706665039062, "learning_rate": 0.00019943113710469177, "loss": 1.7887, "step": 440 }, { "epoch": 0.06317146540610227, "grad_norm": 1.5681798458099365, "learning_rate": 0.00019942618446373717, "loss": 1.8364, "step": 441 }, { "epoch": 0.0633147113594041, "grad_norm": 1.4053391218185425, "learning_rate": 0.0001994212104187855, "loss": 1.998, "step": 442 }, { "epoch": 0.06345795731270591, "grad_norm": 1.3299514055252075, "learning_rate": 0.00019941621497090748, "loss": 1.7609, "step": 443 }, { "epoch": 0.06360120326600774, "grad_norm": 1.3105353116989136, "learning_rate": 0.0001994111981211786, "loss": 1.8204, "step": 444 }, { "epoch": 0.06374444921930955, "grad_norm": 1.3539772033691406, "learning_rate": 0.00019940615987067877, "loss": 2.0658, "step": 445 }, { "epoch": 0.06388769517261138, "grad_norm": 1.2422757148742676, "learning_rate": 0.00019940110022049264, "loss": 2.0037, "step": 446 }, { "epoch": 0.06403094112591319, "grad_norm": 1.1371114253997803, "learning_rate": 0.00019939601917170943, "loss": 1.8564, "step": 447 }, { "epoch": 0.06417418707921502, "grad_norm": 1.4853485822677612, "learning_rate": 0.00019939091672542295, "loss": 2.0655, "step": 448 }, { "epoch": 0.06431743303251683, "grad_norm": 1.2365187406539917, "learning_rate": 0.00019938579288273167, "loss": 1.9946, "step": 449 }, { "epoch": 0.06446067898581866, "grad_norm": 1.442976713180542, "learning_rate": 0.0001993806476447386, "loss": 1.92, "step": 450 }, { "epoch": 0.06460392493912047, "grad_norm": 1.5670967102050781, "learning_rate": 0.00019937548101255139, "loss": 1.8682, "step": 451 }, { "epoch": 0.0647471708924223, "grad_norm": 1.5802968740463257, "learning_rate": 0.00019937029298728226, "loss": 1.8258, "step": 452 }, { "epoch": 0.06489041684572411, "grad_norm": 1.2011916637420654, "learning_rate": 0.00019936508357004806, "loss": 1.7282, "step": 453 }, { "epoch": 0.06503366279902592, "grad_norm": 1.4225175380706787, "learning_rate": 0.0001993598527619703, "loss": 1.8073, "step": 454 }, { "epoch": 0.06517690875232775, "grad_norm": 1.2416908740997314, "learning_rate": 0.000199354600564175, "loss": 1.7272, "step": 455 }, { "epoch": 0.06532015470562956, "grad_norm": 1.1273763179779053, "learning_rate": 0.00019934932697779288, "loss": 1.8676, "step": 456 }, { "epoch": 0.06546340065893139, "grad_norm": 1.3882946968078613, "learning_rate": 0.00019934403200395914, "loss": 2.0666, "step": 457 }, { "epoch": 0.0656066466122332, "grad_norm": 1.3472100496292114, "learning_rate": 0.0001993387156438137, "loss": 1.8852, "step": 458 }, { "epoch": 0.06574989256553503, "grad_norm": 1.3469574451446533, "learning_rate": 0.000199333377898501, "loss": 1.6828, "step": 459 }, { "epoch": 0.06589313851883684, "grad_norm": 1.3398144245147705, "learning_rate": 0.00019932801876917022, "loss": 1.9105, "step": 460 }, { "epoch": 0.06603638447213867, "grad_norm": 1.6499736309051514, "learning_rate": 0.00019932263825697498, "loss": 1.899, "step": 461 }, { "epoch": 0.06617963042544048, "grad_norm": 1.3724581003189087, "learning_rate": 0.00019931723636307353, "loss": 1.9462, "step": 462 }, { "epoch": 0.0663228763787423, "grad_norm": 1.4621167182922363, "learning_rate": 0.00019931181308862885, "loss": 2.0525, "step": 463 }, { "epoch": 0.06646612233204412, "grad_norm": 1.922086477279663, "learning_rate": 0.00019930636843480838, "loss": 1.74, "step": 464 }, { "epoch": 0.06660936828534594, "grad_norm": 1.217824101448059, "learning_rate": 0.00019930090240278422, "loss": 1.912, "step": 465 }, { "epoch": 0.06675261423864776, "grad_norm": 1.247957706451416, "learning_rate": 0.0001992954149937331, "loss": 1.8527, "step": 466 }, { "epoch": 0.06689586019194958, "grad_norm": 1.1887818574905396, "learning_rate": 0.00019928990620883633, "loss": 1.8, "step": 467 }, { "epoch": 0.0670391061452514, "grad_norm": 1.1378886699676514, "learning_rate": 0.0001992843760492798, "loss": 1.9488, "step": 468 }, { "epoch": 0.06718235209855322, "grad_norm": 1.30185866355896, "learning_rate": 0.00019927882451625402, "loss": 1.9737, "step": 469 }, { "epoch": 0.06732559805185503, "grad_norm": 1.2080559730529785, "learning_rate": 0.00019927325161095408, "loss": 1.9244, "step": 470 }, { "epoch": 0.06746884400515686, "grad_norm": 1.2855666875839233, "learning_rate": 0.0001992676573345797, "loss": 2.0461, "step": 471 }, { "epoch": 0.06761208995845867, "grad_norm": 1.2975670099258423, "learning_rate": 0.00019926204168833515, "loss": 1.8016, "step": 472 }, { "epoch": 0.06775533591176049, "grad_norm": 1.0729694366455078, "learning_rate": 0.0001992564046734294, "loss": 1.822, "step": 473 }, { "epoch": 0.06789858186506231, "grad_norm": 1.769511342048645, "learning_rate": 0.000199250746291076, "loss": 1.9553, "step": 474 }, { "epoch": 0.06804182781836413, "grad_norm": 1.524078369140625, "learning_rate": 0.00019924506654249292, "loss": 1.7481, "step": 475 }, { "epoch": 0.06818507377166595, "grad_norm": 1.4613440036773682, "learning_rate": 0.00019923936542890297, "loss": 1.8721, "step": 476 }, { "epoch": 0.06832831972496776, "grad_norm": 1.281390905380249, "learning_rate": 0.00019923364295153343, "loss": 1.9589, "step": 477 }, { "epoch": 0.06847156567826959, "grad_norm": 1.8698230981826782, "learning_rate": 0.00019922789911161622, "loss": 1.8523, "step": 478 }, { "epoch": 0.0686148116315714, "grad_norm": 1.3571187257766724, "learning_rate": 0.00019922213391038786, "loss": 1.857, "step": 479 }, { "epoch": 0.06875805758487323, "grad_norm": 1.3558197021484375, "learning_rate": 0.00019921634734908943, "loss": 1.8722, "step": 480 }, { "epoch": 0.06890130353817504, "grad_norm": 1.2936092615127563, "learning_rate": 0.00019921053942896665, "loss": 1.9151, "step": 481 }, { "epoch": 0.06904454949147687, "grad_norm": 1.6655749082565308, "learning_rate": 0.0001992047101512698, "loss": 1.9252, "step": 482 }, { "epoch": 0.06918779544477868, "grad_norm": 1.4114404916763306, "learning_rate": 0.00019919885951725374, "loss": 1.9171, "step": 483 }, { "epoch": 0.06933104139808051, "grad_norm": 1.4054932594299316, "learning_rate": 0.00019919298752817807, "loss": 1.7685, "step": 484 }, { "epoch": 0.06947428735138232, "grad_norm": 1.5407607555389404, "learning_rate": 0.0001991870941853068, "loss": 1.9321, "step": 485 }, { "epoch": 0.06961753330468415, "grad_norm": 1.5114518404006958, "learning_rate": 0.00019918117948990868, "loss": 1.681, "step": 486 }, { "epoch": 0.06976077925798596, "grad_norm": 1.1382354497909546, "learning_rate": 0.00019917524344325696, "loss": 1.9091, "step": 487 }, { "epoch": 0.06990402521128779, "grad_norm": 1.609876036643982, "learning_rate": 0.00019916928604662952, "loss": 1.8429, "step": 488 }, { "epoch": 0.0700472711645896, "grad_norm": 1.2324614524841309, "learning_rate": 0.0001991633073013089, "loss": 1.9848, "step": 489 }, { "epoch": 0.07019051711789141, "grad_norm": 1.5840328931808472, "learning_rate": 0.00019915730720858208, "loss": 1.8343, "step": 490 }, { "epoch": 0.07033376307119324, "grad_norm": 1.394176959991455, "learning_rate": 0.00019915128576974077, "loss": 1.874, "step": 491 }, { "epoch": 0.07047700902449505, "grad_norm": 1.4741060733795166, "learning_rate": 0.00019914524298608128, "loss": 1.9982, "step": 492 }, { "epoch": 0.07062025497779688, "grad_norm": 1.872179388999939, "learning_rate": 0.00019913917885890442, "loss": 1.87, "step": 493 }, { "epoch": 0.07076350093109869, "grad_norm": 1.457639217376709, "learning_rate": 0.00019913309338951568, "loss": 1.8309, "step": 494 }, { "epoch": 0.07090674688440052, "grad_norm": 1.4248993396759033, "learning_rate": 0.00019912698657922507, "loss": 1.8919, "step": 495 }, { "epoch": 0.07104999283770233, "grad_norm": 1.6535437107086182, "learning_rate": 0.0001991208584293473, "loss": 1.8902, "step": 496 }, { "epoch": 0.07119323879100416, "grad_norm": 1.2773194313049316, "learning_rate": 0.00019911470894120155, "loss": 1.7948, "step": 497 }, { "epoch": 0.07133648474430597, "grad_norm": 1.1379121541976929, "learning_rate": 0.0001991085381161117, "loss": 1.839, "step": 498 }, { "epoch": 0.0714797306976078, "grad_norm": 1.303226351737976, "learning_rate": 0.0001991023459554061, "loss": 1.8876, "step": 499 }, { "epoch": 0.07162297665090961, "grad_norm": 1.3400683403015137, "learning_rate": 0.00019909613246041788, "loss": 1.881, "step": 500 }, { "epoch": 0.07176622260421144, "grad_norm": 1.4628103971481323, "learning_rate": 0.00019908989763248454, "loss": 1.8523, "step": 501 }, { "epoch": 0.07190946855751325, "grad_norm": 1.6308717727661133, "learning_rate": 0.00019908364147294835, "loss": 1.7968, "step": 502 }, { "epoch": 0.07205271451081507, "grad_norm": 1.3459229469299316, "learning_rate": 0.00019907736398315607, "loss": 1.7511, "step": 503 }, { "epoch": 0.07219596046411689, "grad_norm": 1.5009410381317139, "learning_rate": 0.00019907106516445913, "loss": 1.916, "step": 504 }, { "epoch": 0.07233920641741871, "grad_norm": 1.1755175590515137, "learning_rate": 0.00019906474501821348, "loss": 1.9279, "step": 505 }, { "epoch": 0.07248245237072053, "grad_norm": 1.5116721391677856, "learning_rate": 0.00019905840354577972, "loss": 1.81, "step": 506 }, { "epoch": 0.07262569832402235, "grad_norm": 1.4349769353866577, "learning_rate": 0.00019905204074852298, "loss": 1.8853, "step": 507 }, { "epoch": 0.07276894427732417, "grad_norm": 1.420134425163269, "learning_rate": 0.000199045656627813, "loss": 1.7951, "step": 508 }, { "epoch": 0.07291219023062598, "grad_norm": 1.383614182472229, "learning_rate": 0.00019903925118502417, "loss": 1.7075, "step": 509 }, { "epoch": 0.0730554361839278, "grad_norm": 1.4356929063796997, "learning_rate": 0.0001990328244215354, "loss": 1.8192, "step": 510 }, { "epoch": 0.07319868213722962, "grad_norm": 1.1821198463439941, "learning_rate": 0.00019902637633873021, "loss": 1.9616, "step": 511 }, { "epoch": 0.07334192809053144, "grad_norm": 1.1920559406280518, "learning_rate": 0.00019901990693799672, "loss": 1.767, "step": 512 }, { "epoch": 0.07348517404383326, "grad_norm": 1.2896835803985596, "learning_rate": 0.00019901341622072764, "loss": 2.155, "step": 513 }, { "epoch": 0.07362841999713508, "grad_norm": 1.6448336839675903, "learning_rate": 0.00019900690418832022, "loss": 1.6994, "step": 514 }, { "epoch": 0.0737716659504369, "grad_norm": 1.4044462442398071, "learning_rate": 0.00019900037084217637, "loss": 1.7893, "step": 515 }, { "epoch": 0.07391491190373872, "grad_norm": 1.4587130546569824, "learning_rate": 0.0001989938161837026, "loss": 1.8875, "step": 516 }, { "epoch": 0.07405815785704054, "grad_norm": 1.2490543127059937, "learning_rate": 0.0001989872402143099, "loss": 1.7479, "step": 517 }, { "epoch": 0.07420140381034236, "grad_norm": 1.501863956451416, "learning_rate": 0.00019898064293541392, "loss": 1.9654, "step": 518 }, { "epoch": 0.07434464976364417, "grad_norm": 1.215704321861267, "learning_rate": 0.0001989740243484349, "loss": 1.8862, "step": 519 }, { "epoch": 0.074487895716946, "grad_norm": 1.5387451648712158, "learning_rate": 0.00019896738445479768, "loss": 1.7493, "step": 520 }, { "epoch": 0.07463114167024781, "grad_norm": 1.77334725856781, "learning_rate": 0.00019896072325593166, "loss": 1.9767, "step": 521 }, { "epoch": 0.07477438762354964, "grad_norm": 1.5325254201889038, "learning_rate": 0.0001989540407532708, "loss": 1.7749, "step": 522 }, { "epoch": 0.07491763357685145, "grad_norm": 1.309705376625061, "learning_rate": 0.00019894733694825374, "loss": 1.8098, "step": 523 }, { "epoch": 0.07506087953015328, "grad_norm": 1.413210391998291, "learning_rate": 0.00019894061184232357, "loss": 1.6865, "step": 524 }, { "epoch": 0.07520412548345509, "grad_norm": 1.2971785068511963, "learning_rate": 0.00019893386543692806, "loss": 2.0, "step": 525 }, { "epoch": 0.0753473714367569, "grad_norm": 1.5852457284927368, "learning_rate": 0.00019892709773351958, "loss": 1.7778, "step": 526 }, { "epoch": 0.07549061739005873, "grad_norm": 1.5320847034454346, "learning_rate": 0.00019892030873355497, "loss": 1.6361, "step": 527 }, { "epoch": 0.07563386334336054, "grad_norm": 1.4010851383209229, "learning_rate": 0.00019891349843849585, "loss": 1.7991, "step": 528 }, { "epoch": 0.07577710929666237, "grad_norm": 1.3538016080856323, "learning_rate": 0.0001989066668498082, "loss": 1.8997, "step": 529 }, { "epoch": 0.07592035524996418, "grad_norm": 1.4806427955627441, "learning_rate": 0.00019889981396896278, "loss": 1.8361, "step": 530 }, { "epoch": 0.07606360120326601, "grad_norm": 1.559334635734558, "learning_rate": 0.00019889293979743477, "loss": 1.6746, "step": 531 }, { "epoch": 0.07620684715656782, "grad_norm": 1.3477094173431396, "learning_rate": 0.00019888604433670405, "loss": 1.8048, "step": 532 }, { "epoch": 0.07635009310986965, "grad_norm": 1.4291020631790161, "learning_rate": 0.00019887912758825504, "loss": 1.9826, "step": 533 }, { "epoch": 0.07649333906317146, "grad_norm": 1.3956358432769775, "learning_rate": 0.00019887218955357675, "loss": 1.8392, "step": 534 }, { "epoch": 0.07663658501647329, "grad_norm": 1.3596224784851074, "learning_rate": 0.00019886523023416274, "loss": 1.8294, "step": 535 }, { "epoch": 0.0767798309697751, "grad_norm": 1.344411849975586, "learning_rate": 0.00019885824963151118, "loss": 1.9847, "step": 536 }, { "epoch": 0.07692307692307693, "grad_norm": 1.5432337522506714, "learning_rate": 0.00019885124774712487, "loss": 1.8735, "step": 537 }, { "epoch": 0.07706632287637874, "grad_norm": 1.4612058401107788, "learning_rate": 0.00019884422458251109, "loss": 1.8228, "step": 538 }, { "epoch": 0.07720956882968057, "grad_norm": 1.412908673286438, "learning_rate": 0.00019883718013918178, "loss": 1.8775, "step": 539 }, { "epoch": 0.07735281478298238, "grad_norm": 1.836916446685791, "learning_rate": 0.00019883011441865342, "loss": 1.938, "step": 540 }, { "epoch": 0.0774960607362842, "grad_norm": 1.7175601720809937, "learning_rate": 0.0001988230274224471, "loss": 1.6055, "step": 541 }, { "epoch": 0.07763930668958602, "grad_norm": 1.8685137033462524, "learning_rate": 0.00019881591915208845, "loss": 1.7533, "step": 542 }, { "epoch": 0.07778255264288784, "grad_norm": 1.7033354043960571, "learning_rate": 0.00019880878960910772, "loss": 1.8844, "step": 543 }, { "epoch": 0.07792579859618966, "grad_norm": 1.3682303428649902, "learning_rate": 0.00019880163879503973, "loss": 1.857, "step": 544 }, { "epoch": 0.07806904454949147, "grad_norm": 1.589799404144287, "learning_rate": 0.00019879446671142387, "loss": 2.0505, "step": 545 }, { "epoch": 0.0782122905027933, "grad_norm": 1.4113374948501587, "learning_rate": 0.00019878727335980413, "loss": 1.7401, "step": 546 }, { "epoch": 0.07835553645609511, "grad_norm": 1.4761561155319214, "learning_rate": 0.000198780058741729, "loss": 1.7849, "step": 547 }, { "epoch": 0.07849878240939694, "grad_norm": 1.312427282333374, "learning_rate": 0.0001987728228587517, "loss": 1.8672, "step": 548 }, { "epoch": 0.07864202836269875, "grad_norm": 1.3250832557678223, "learning_rate": 0.00019876556571242986, "loss": 1.7832, "step": 549 }, { "epoch": 0.07878527431600058, "grad_norm": 1.4705543518066406, "learning_rate": 0.0001987582873043258, "loss": 1.7986, "step": 550 }, { "epoch": 0.07892852026930239, "grad_norm": 1.3344213962554932, "learning_rate": 0.00019875098763600635, "loss": 1.9054, "step": 551 }, { "epoch": 0.07907176622260421, "grad_norm": 1.453491449356079, "learning_rate": 0.00019874366670904299, "loss": 1.6129, "step": 552 }, { "epoch": 0.07921501217590603, "grad_norm": 1.5356755256652832, "learning_rate": 0.00019873632452501168, "loss": 1.9653, "step": 553 }, { "epoch": 0.07935825812920785, "grad_norm": 1.9556617736816406, "learning_rate": 0.00019872896108549308, "loss": 1.7465, "step": 554 }, { "epoch": 0.07950150408250967, "grad_norm": 1.4671192169189453, "learning_rate": 0.00019872157639207232, "loss": 1.7662, "step": 555 }, { "epoch": 0.07964475003581149, "grad_norm": 1.8577529191970825, "learning_rate": 0.00019871417044633913, "loss": 1.8747, "step": 556 }, { "epoch": 0.0797879959891133, "grad_norm": 1.4766255617141724, "learning_rate": 0.00019870674324988781, "loss": 1.7978, "step": 557 }, { "epoch": 0.07993124194241513, "grad_norm": 1.6983734369277954, "learning_rate": 0.00019869929480431734, "loss": 1.7239, "step": 558 }, { "epoch": 0.08007448789571694, "grad_norm": 1.6475611925125122, "learning_rate": 0.00019869182511123113, "loss": 1.8591, "step": 559 }, { "epoch": 0.08021773384901877, "grad_norm": 2.0189778804779053, "learning_rate": 0.00019868433417223716, "loss": 1.8644, "step": 560 }, { "epoch": 0.08036097980232058, "grad_norm": 1.4063400030136108, "learning_rate": 0.00019867682198894816, "loss": 1.7548, "step": 561 }, { "epoch": 0.08050422575562241, "grad_norm": 1.583289623260498, "learning_rate": 0.00019866928856298119, "loss": 1.8043, "step": 562 }, { "epoch": 0.08064747170892422, "grad_norm": 1.55256986618042, "learning_rate": 0.00019866173389595813, "loss": 1.7083, "step": 563 }, { "epoch": 0.08079071766222604, "grad_norm": 1.4576290845870972, "learning_rate": 0.00019865415798950525, "loss": 1.9382, "step": 564 }, { "epoch": 0.08093396361552786, "grad_norm": 1.7685655355453491, "learning_rate": 0.00019864656084525345, "loss": 1.8558, "step": 565 }, { "epoch": 0.08107720956882968, "grad_norm": 2.316591501235962, "learning_rate": 0.00019863894246483822, "loss": 1.6276, "step": 566 }, { "epoch": 0.0812204555221315, "grad_norm": 1.6802818775177002, "learning_rate": 0.00019863130284989965, "loss": 1.8006, "step": 567 }, { "epoch": 0.08136370147543331, "grad_norm": 1.6730725765228271, "learning_rate": 0.0001986236420020823, "loss": 1.7016, "step": 568 }, { "epoch": 0.08150694742873514, "grad_norm": 1.8522088527679443, "learning_rate": 0.00019861595992303537, "loss": 1.7074, "step": 569 }, { "epoch": 0.08165019338203695, "grad_norm": 2.0142784118652344, "learning_rate": 0.00019860825661441266, "loss": 1.7981, "step": 570 }, { "epoch": 0.08179343933533878, "grad_norm": 1.587649941444397, "learning_rate": 0.00019860053207787246, "loss": 1.6049, "step": 571 }, { "epoch": 0.08193668528864059, "grad_norm": 1.9246728420257568, "learning_rate": 0.0001985927863150777, "loss": 1.7698, "step": 572 }, { "epoch": 0.08207993124194242, "grad_norm": 1.4553924798965454, "learning_rate": 0.0001985850193276958, "loss": 1.6821, "step": 573 }, { "epoch": 0.08222317719524423, "grad_norm": 1.7816945314407349, "learning_rate": 0.0001985772311173989, "loss": 1.6999, "step": 574 }, { "epoch": 0.08236642314854606, "grad_norm": 2.0161430835723877, "learning_rate": 0.00019856942168586353, "loss": 1.721, "step": 575 }, { "epoch": 0.08250966910184787, "grad_norm": 1.947036623954773, "learning_rate": 0.00019856159103477086, "loss": 1.6449, "step": 576 }, { "epoch": 0.0826529150551497, "grad_norm": 1.5800046920776367, "learning_rate": 0.00019855373916580668, "loss": 1.8299, "step": 577 }, { "epoch": 0.08279616100845151, "grad_norm": 1.7868382930755615, "learning_rate": 0.00019854586608066127, "loss": 1.7032, "step": 578 }, { "epoch": 0.08293940696175334, "grad_norm": 1.6116896867752075, "learning_rate": 0.00019853797178102952, "loss": 1.8266, "step": 579 }, { "epoch": 0.08308265291505515, "grad_norm": 1.9898512363433838, "learning_rate": 0.0001985300562686109, "loss": 1.6916, "step": 580 }, { "epoch": 0.08322589886835696, "grad_norm": 1.758905291557312, "learning_rate": 0.00019852211954510943, "loss": 1.7571, "step": 581 }, { "epoch": 0.08336914482165879, "grad_norm": 1.9583605527877808, "learning_rate": 0.0001985141616122336, "loss": 1.7946, "step": 582 }, { "epoch": 0.0835123907749606, "grad_norm": 2.251268148422241, "learning_rate": 0.00019850618247169667, "loss": 1.7275, "step": 583 }, { "epoch": 0.08365563672826243, "grad_norm": 1.9306137561798096, "learning_rate": 0.0001984981821252163, "loss": 1.6006, "step": 584 }, { "epoch": 0.08379888268156424, "grad_norm": 2.1751723289489746, "learning_rate": 0.00019849016057451476, "loss": 1.7321, "step": 585 }, { "epoch": 0.08394212863486607, "grad_norm": 1.992838740348816, "learning_rate": 0.00019848211782131888, "loss": 1.8335, "step": 586 }, { "epoch": 0.08408537458816788, "grad_norm": 2.430884599685669, "learning_rate": 0.00019847405386736014, "loss": 1.8502, "step": 587 }, { "epoch": 0.0842286205414697, "grad_norm": 1.7405625581741333, "learning_rate": 0.00019846596871437441, "loss": 1.6762, "step": 588 }, { "epoch": 0.08437186649477152, "grad_norm": 1.7707555294036865, "learning_rate": 0.00019845786236410227, "loss": 1.7992, "step": 589 }, { "epoch": 0.08451511244807335, "grad_norm": 2.1325972080230713, "learning_rate": 0.00019844973481828886, "loss": 1.703, "step": 590 }, { "epoch": 0.08465835840137516, "grad_norm": 2.0813379287719727, "learning_rate": 0.00019844158607868376, "loss": 1.6277, "step": 591 }, { "epoch": 0.08480160435467698, "grad_norm": 1.669659972190857, "learning_rate": 0.00019843341614704125, "loss": 1.806, "step": 592 }, { "epoch": 0.0849448503079788, "grad_norm": 2.092416763305664, "learning_rate": 0.00019842522502512008, "loss": 1.8738, "step": 593 }, { "epoch": 0.08508809626128062, "grad_norm": 1.4728245735168457, "learning_rate": 0.0001984170127146836, "loss": 1.8159, "step": 594 }, { "epoch": 0.08523134221458244, "grad_norm": 2.1759033203125, "learning_rate": 0.00019840877921749973, "loss": 1.7924, "step": 595 }, { "epoch": 0.08537458816788426, "grad_norm": 1.864659070968628, "learning_rate": 0.00019840052453534094, "loss": 1.8479, "step": 596 }, { "epoch": 0.08551783412118608, "grad_norm": 2.0173592567443848, "learning_rate": 0.00019839224866998424, "loss": 1.8085, "step": 597 }, { "epoch": 0.0856610800744879, "grad_norm": 1.7638306617736816, "learning_rate": 0.00019838395162321125, "loss": 1.866, "step": 598 }, { "epoch": 0.08580432602778972, "grad_norm": 1.443919062614441, "learning_rate": 0.0001983756333968081, "loss": 1.8115, "step": 599 }, { "epoch": 0.08594757198109153, "grad_norm": 1.8691120147705078, "learning_rate": 0.0001983672939925655, "loss": 1.6545, "step": 600 }, { "epoch": 0.08609081793439335, "grad_norm": 2.423405170440674, "learning_rate": 0.00019835893341227872, "loss": 1.7101, "step": 601 }, { "epoch": 0.08623406388769517, "grad_norm": 2.1188912391662598, "learning_rate": 0.00019835055165774756, "loss": 1.5544, "step": 602 }, { "epoch": 0.086377309840997, "grad_norm": 1.7928441762924194, "learning_rate": 0.00019834214873077643, "loss": 1.804, "step": 603 }, { "epoch": 0.0865205557942988, "grad_norm": 1.8053836822509766, "learning_rate": 0.00019833372463317427, "loss": 1.7467, "step": 604 }, { "epoch": 0.08666380174760063, "grad_norm": 2.1015563011169434, "learning_rate": 0.0001983252793667546, "loss": 1.599, "step": 605 }, { "epoch": 0.08680704770090245, "grad_norm": 1.765212059020996, "learning_rate": 0.00019831681293333545, "loss": 1.7277, "step": 606 }, { "epoch": 0.08695029365420427, "grad_norm": 1.675680160522461, "learning_rate": 0.00019830832533473939, "loss": 1.6561, "step": 607 }, { "epoch": 0.08709353960750608, "grad_norm": 2.0614521503448486, "learning_rate": 0.0001982998165727937, "loss": 1.7373, "step": 608 }, { "epoch": 0.08723678556080791, "grad_norm": 1.3164680004119873, "learning_rate": 0.00019829128664933002, "loss": 1.6432, "step": 609 }, { "epoch": 0.08738003151410972, "grad_norm": 1.9503114223480225, "learning_rate": 0.00019828273556618466, "loss": 1.7732, "step": 610 }, { "epoch": 0.08752327746741155, "grad_norm": 1.5411949157714844, "learning_rate": 0.00019827416332519844, "loss": 1.8937, "step": 611 }, { "epoch": 0.08766652342071336, "grad_norm": 1.9805026054382324, "learning_rate": 0.00019826556992821678, "loss": 1.7267, "step": 612 }, { "epoch": 0.08780976937401519, "grad_norm": 2.108997106552124, "learning_rate": 0.0001982569553770896, "loss": 1.6758, "step": 613 }, { "epoch": 0.087953015327317, "grad_norm": 1.4876041412353516, "learning_rate": 0.0001982483196736714, "loss": 1.7274, "step": 614 }, { "epoch": 0.08809626128061883, "grad_norm": 1.7633486986160278, "learning_rate": 0.00019823966281982128, "loss": 1.5463, "step": 615 }, { "epoch": 0.08823950723392064, "grad_norm": 1.9821653366088867, "learning_rate": 0.00019823098481740276, "loss": 1.6797, "step": 616 }, { "epoch": 0.08838275318722245, "grad_norm": 1.4849814176559448, "learning_rate": 0.0001982222856682841, "loss": 1.7138, "step": 617 }, { "epoch": 0.08852599914052428, "grad_norm": 2.2080535888671875, "learning_rate": 0.0001982135653743379, "loss": 1.7187, "step": 618 }, { "epoch": 0.0886692450938261, "grad_norm": 1.4369357824325562, "learning_rate": 0.00019820482393744152, "loss": 1.6414, "step": 619 }, { "epoch": 0.08881249104712792, "grad_norm": 1.4783408641815186, "learning_rate": 0.00019819606135947676, "loss": 1.5684, "step": 620 }, { "epoch": 0.08895573700042973, "grad_norm": 1.8696465492248535, "learning_rate": 0.00019818727764232992, "loss": 1.6218, "step": 621 }, { "epoch": 0.08909898295373156, "grad_norm": 1.7060953378677368, "learning_rate": 0.00019817847278789198, "loss": 1.6335, "step": 622 }, { "epoch": 0.08924222890703337, "grad_norm": 1.6891498565673828, "learning_rate": 0.00019816964679805838, "loss": 1.7383, "step": 623 }, { "epoch": 0.0893854748603352, "grad_norm": 2.057689905166626, "learning_rate": 0.00019816079967472915, "loss": 1.6232, "step": 624 }, { "epoch": 0.08952872081363701, "grad_norm": 1.500284194946289, "learning_rate": 0.00019815193141980886, "loss": 1.538, "step": 625 }, { "epoch": 0.08967196676693884, "grad_norm": 1.5171517133712769, "learning_rate": 0.0001981430420352066, "loss": 1.7768, "step": 626 }, { "epoch": 0.08981521272024065, "grad_norm": 1.9033976793289185, "learning_rate": 0.0001981341315228361, "loss": 1.605, "step": 627 }, { "epoch": 0.08995845867354248, "grad_norm": 1.3244998455047607, "learning_rate": 0.00019812519988461548, "loss": 1.7871, "step": 628 }, { "epoch": 0.09010170462684429, "grad_norm": 1.9696805477142334, "learning_rate": 0.00019811624712246756, "loss": 1.7835, "step": 629 }, { "epoch": 0.09024495058014612, "grad_norm": 2.000761032104492, "learning_rate": 0.00019810727323831966, "loss": 1.5602, "step": 630 }, { "epoch": 0.09038819653344793, "grad_norm": 1.6452912092208862, "learning_rate": 0.00019809827823410358, "loss": 1.7776, "step": 631 }, { "epoch": 0.09053144248674976, "grad_norm": 2.469255208969116, "learning_rate": 0.00019808926211175573, "loss": 1.5445, "step": 632 }, { "epoch": 0.09067468844005157, "grad_norm": 1.5338579416275024, "learning_rate": 0.0001980802248732171, "loss": 1.5889, "step": 633 }, { "epoch": 0.0908179343933534, "grad_norm": 2.191380023956299, "learning_rate": 0.0001980711665204332, "loss": 1.7019, "step": 634 }, { "epoch": 0.09096118034665521, "grad_norm": 1.311061978340149, "learning_rate": 0.00019806208705535402, "loss": 1.6471, "step": 635 }, { "epoch": 0.09110442629995702, "grad_norm": 1.5730485916137695, "learning_rate": 0.0001980529864799341, "loss": 1.7731, "step": 636 }, { "epoch": 0.09124767225325885, "grad_norm": 1.9627255201339722, "learning_rate": 0.0001980438647961327, "loss": 1.7838, "step": 637 }, { "epoch": 0.09139091820656066, "grad_norm": 1.708469271659851, "learning_rate": 0.00019803472200591337, "loss": 1.7174, "step": 638 }, { "epoch": 0.09153416415986249, "grad_norm": 1.6527167558670044, "learning_rate": 0.00019802555811124437, "loss": 1.6941, "step": 639 }, { "epoch": 0.0916774101131643, "grad_norm": 1.3351771831512451, "learning_rate": 0.00019801637311409848, "loss": 1.6088, "step": 640 }, { "epoch": 0.09182065606646612, "grad_norm": 1.586374044418335, "learning_rate": 0.000198007167016453, "loss": 1.6936, "step": 641 }, { "epoch": 0.09196390201976794, "grad_norm": 1.4951837062835693, "learning_rate": 0.00019799793982028976, "loss": 1.7632, "step": 642 }, { "epoch": 0.09210714797306976, "grad_norm": 1.4759860038757324, "learning_rate": 0.00019798869152759513, "loss": 1.6381, "step": 643 }, { "epoch": 0.09225039392637158, "grad_norm": 1.6470977067947388, "learning_rate": 0.00019797942214036009, "loss": 1.4551, "step": 644 }, { "epoch": 0.0923936398796734, "grad_norm": 1.4977290630340576, "learning_rate": 0.00019797013166058004, "loss": 1.7218, "step": 645 }, { "epoch": 0.09253688583297522, "grad_norm": 2.3927955627441406, "learning_rate": 0.00019796082009025505, "loss": 1.7845, "step": 646 }, { "epoch": 0.09268013178627704, "grad_norm": 2.4020349979400635, "learning_rate": 0.00019795148743138966, "loss": 1.6147, "step": 647 }, { "epoch": 0.09282337773957886, "grad_norm": 1.9723669290542603, "learning_rate": 0.00019794213368599294, "loss": 1.7094, "step": 648 }, { "epoch": 0.09296662369288068, "grad_norm": 1.9376848936080933, "learning_rate": 0.0001979327588560785, "loss": 1.6721, "step": 649 }, { "epoch": 0.0931098696461825, "grad_norm": 2.211782693862915, "learning_rate": 0.00019792336294366457, "loss": 1.6061, "step": 650 }, { "epoch": 0.09325311559948432, "grad_norm": 1.63835608959198, "learning_rate": 0.00019791394595077382, "loss": 1.6629, "step": 651 }, { "epoch": 0.09339636155278613, "grad_norm": 2.1630845069885254, "learning_rate": 0.0001979045078794335, "loss": 1.7264, "step": 652 }, { "epoch": 0.09353960750608795, "grad_norm": 1.556217074394226, "learning_rate": 0.00019789504873167537, "loss": 1.7157, "step": 653 }, { "epoch": 0.09368285345938977, "grad_norm": 1.9371320009231567, "learning_rate": 0.0001978855685095358, "loss": 1.6862, "step": 654 }, { "epoch": 0.09382609941269159, "grad_norm": 1.8358875513076782, "learning_rate": 0.0001978760672150556, "loss": 1.6879, "step": 655 }, { "epoch": 0.09396934536599341, "grad_norm": 1.7639013528823853, "learning_rate": 0.0001978665448502802, "loss": 1.8627, "step": 656 }, { "epoch": 0.09411259131929522, "grad_norm": 2.1679720878601074, "learning_rate": 0.00019785700141725953, "loss": 1.5589, "step": 657 }, { "epoch": 0.09425583727259705, "grad_norm": 1.6932626962661743, "learning_rate": 0.00019784743691804804, "loss": 1.5579, "step": 658 }, { "epoch": 0.09439908322589886, "grad_norm": 1.5645488500595093, "learning_rate": 0.0001978378513547047, "loss": 1.5725, "step": 659 }, { "epoch": 0.09454232917920069, "grad_norm": 1.2878520488739014, "learning_rate": 0.00019782824472929308, "loss": 1.6327, "step": 660 }, { "epoch": 0.0946855751325025, "grad_norm": 1.5387991666793823, "learning_rate": 0.00019781861704388124, "loss": 1.8507, "step": 661 }, { "epoch": 0.09482882108580433, "grad_norm": 1.7131320238113403, "learning_rate": 0.0001978089683005418, "loss": 1.508, "step": 662 }, { "epoch": 0.09497206703910614, "grad_norm": 1.4265942573547363, "learning_rate": 0.00019779929850135189, "loss": 1.5197, "step": 663 }, { "epoch": 0.09511531299240797, "grad_norm": 1.8079270124435425, "learning_rate": 0.00019778960764839316, "loss": 1.6645, "step": 664 }, { "epoch": 0.09525855894570978, "grad_norm": 1.3312954902648926, "learning_rate": 0.00019777989574375183, "loss": 1.5565, "step": 665 }, { "epoch": 0.09540180489901161, "grad_norm": 1.556467056274414, "learning_rate": 0.0001977701627895186, "loss": 1.746, "step": 666 }, { "epoch": 0.09554505085231342, "grad_norm": 1.7721006870269775, "learning_rate": 0.00019776040878778875, "loss": 1.5241, "step": 667 }, { "epoch": 0.09568829680561525, "grad_norm": 1.765176773071289, "learning_rate": 0.00019775063374066212, "loss": 1.5298, "step": 668 }, { "epoch": 0.09583154275891706, "grad_norm": 1.6512705087661743, "learning_rate": 0.000197740837650243, "loss": 1.6805, "step": 669 }, { "epoch": 0.09597478871221889, "grad_norm": 1.600159764289856, "learning_rate": 0.00019773102051864018, "loss": 1.5506, "step": 670 }, { "epoch": 0.0961180346655207, "grad_norm": 1.7851074934005737, "learning_rate": 0.00019772118234796717, "loss": 1.5179, "step": 671 }, { "epoch": 0.09626128061882251, "grad_norm": 2.052377939224243, "learning_rate": 0.0001977113231403418, "loss": 1.6381, "step": 672 }, { "epoch": 0.09640452657212434, "grad_norm": 1.7640641927719116, "learning_rate": 0.00019770144289788655, "loss": 1.6755, "step": 673 }, { "epoch": 0.09654777252542615, "grad_norm": 1.3730601072311401, "learning_rate": 0.00019769154162272839, "loss": 1.4196, "step": 674 }, { "epoch": 0.09669101847872798, "grad_norm": 1.3700706958770752, "learning_rate": 0.0001976816193169988, "loss": 1.6946, "step": 675 }, { "epoch": 0.09683426443202979, "grad_norm": 1.4719998836517334, "learning_rate": 0.0001976716759828338, "loss": 1.6281, "step": 676 }, { "epoch": 0.09697751038533162, "grad_norm": 1.3043746948242188, "learning_rate": 0.00019766171162237397, "loss": 1.807, "step": 677 }, { "epoch": 0.09712075633863343, "grad_norm": 1.6509284973144531, "learning_rate": 0.00019765172623776437, "loss": 1.6569, "step": 678 }, { "epoch": 0.09726400229193526, "grad_norm": 1.9164206981658936, "learning_rate": 0.00019764171983115462, "loss": 1.6634, "step": 679 }, { "epoch": 0.09740724824523707, "grad_norm": 1.4596762657165527, "learning_rate": 0.00019763169240469885, "loss": 1.6812, "step": 680 }, { "epoch": 0.0975504941985389, "grad_norm": 1.8942707777023315, "learning_rate": 0.00019762164396055573, "loss": 1.7299, "step": 681 }, { "epoch": 0.09769374015184071, "grad_norm": 1.4766433238983154, "learning_rate": 0.00019761157450088842, "loss": 1.9527, "step": 682 }, { "epoch": 0.09783698610514253, "grad_norm": 1.445786476135254, "learning_rate": 0.0001976014840278646, "loss": 1.6917, "step": 683 }, { "epoch": 0.09798023205844435, "grad_norm": 1.5590589046478271, "learning_rate": 0.00019759137254365657, "loss": 1.5339, "step": 684 }, { "epoch": 0.09812347801174617, "grad_norm": 1.8950871229171753, "learning_rate": 0.000197581240050441, "loss": 1.4936, "step": 685 }, { "epoch": 0.09826672396504799, "grad_norm": 1.9997422695159912, "learning_rate": 0.00019757108655039924, "loss": 1.6934, "step": 686 }, { "epoch": 0.09840996991834981, "grad_norm": 1.4285129308700562, "learning_rate": 0.00019756091204571708, "loss": 1.5747, "step": 687 }, { "epoch": 0.09855321587165163, "grad_norm": 1.658769965171814, "learning_rate": 0.00019755071653858476, "loss": 1.7785, "step": 688 }, { "epoch": 0.09869646182495345, "grad_norm": 1.8999029397964478, "learning_rate": 0.00019754050003119723, "loss": 1.5716, "step": 689 }, { "epoch": 0.09883970777825526, "grad_norm": 1.6866711378097534, "learning_rate": 0.00019753026252575375, "loss": 1.8494, "step": 690 }, { "epoch": 0.09898295373155708, "grad_norm": 1.254889726638794, "learning_rate": 0.00019752000402445825, "loss": 1.7504, "step": 691 }, { "epoch": 0.0991261996848589, "grad_norm": 1.7436747550964355, "learning_rate": 0.00019750972452951918, "loss": 1.5608, "step": 692 }, { "epoch": 0.09926944563816072, "grad_norm": 1.343024492263794, "learning_rate": 0.00019749942404314935, "loss": 1.7148, "step": 693 }, { "epoch": 0.09941269159146254, "grad_norm": 1.7251167297363281, "learning_rate": 0.00019748910256756628, "loss": 1.5804, "step": 694 }, { "epoch": 0.09955593754476436, "grad_norm": 1.5857722759246826, "learning_rate": 0.00019747876010499192, "loss": 1.6637, "step": 695 }, { "epoch": 0.09969918349806618, "grad_norm": 1.2661147117614746, "learning_rate": 0.0001974683966576527, "loss": 1.6412, "step": 696 }, { "epoch": 0.099842429451368, "grad_norm": 1.3366247415542603, "learning_rate": 0.00019745801222777968, "loss": 1.6925, "step": 697 }, { "epoch": 0.09998567540466982, "grad_norm": 1.6041820049285889, "learning_rate": 0.00019744760681760832, "loss": 1.5448, "step": 698 }, { "epoch": 0.10012892135797163, "grad_norm": 1.5244828462600708, "learning_rate": 0.0001974371804293787, "loss": 1.5587, "step": 699 }, { "epoch": 0.10027216731127346, "grad_norm": 1.45596444606781, "learning_rate": 0.0001974267330653353, "loss": 1.634, "step": 700 }, { "epoch": 0.10041541326457527, "grad_norm": 1.7106397151947021, "learning_rate": 0.00019741626472772722, "loss": 1.7385, "step": 701 }, { "epoch": 0.1005586592178771, "grad_norm": 1.6318823099136353, "learning_rate": 0.000197405775418808, "loss": 1.6464, "step": 702 }, { "epoch": 0.10070190517117891, "grad_norm": 1.9525867700576782, "learning_rate": 0.00019739526514083578, "loss": 1.727, "step": 703 }, { "epoch": 0.10084515112448074, "grad_norm": 1.8090976476669312, "learning_rate": 0.00019738473389607314, "loss": 1.5947, "step": 704 }, { "epoch": 0.10098839707778255, "grad_norm": 1.7600536346435547, "learning_rate": 0.00019737418168678714, "loss": 1.5912, "step": 705 }, { "epoch": 0.10113164303108438, "grad_norm": 1.6049998998641968, "learning_rate": 0.0001973636085152495, "loss": 1.5044, "step": 706 }, { "epoch": 0.10127488898438619, "grad_norm": 1.4004878997802734, "learning_rate": 0.00019735301438373633, "loss": 1.6854, "step": 707 }, { "epoch": 0.101418134937688, "grad_norm": 1.7660269737243652, "learning_rate": 0.00019734239929452825, "loss": 1.5787, "step": 708 }, { "epoch": 0.10156138089098983, "grad_norm": 1.438454508781433, "learning_rate": 0.00019733176324991046, "loss": 1.8692, "step": 709 }, { "epoch": 0.10170462684429164, "grad_norm": 1.228301763534546, "learning_rate": 0.00019732110625217262, "loss": 1.7537, "step": 710 }, { "epoch": 0.10184787279759347, "grad_norm": 1.4191251993179321, "learning_rate": 0.00019731042830360896, "loss": 1.5946, "step": 711 }, { "epoch": 0.10199111875089528, "grad_norm": 1.5328761339187622, "learning_rate": 0.0001972997294065181, "loss": 1.6684, "step": 712 }, { "epoch": 0.10213436470419711, "grad_norm": 1.6361255645751953, "learning_rate": 0.0001972890095632033, "loss": 1.6493, "step": 713 }, { "epoch": 0.10227761065749892, "grad_norm": 1.4105583429336548, "learning_rate": 0.00019727826877597232, "loss": 1.6317, "step": 714 }, { "epoch": 0.10242085661080075, "grad_norm": 1.3408552408218384, "learning_rate": 0.0001972675070471373, "loss": 1.6468, "step": 715 }, { "epoch": 0.10256410256410256, "grad_norm": 1.877536416053772, "learning_rate": 0.000197256724379015, "loss": 1.5974, "step": 716 }, { "epoch": 0.10270734851740439, "grad_norm": 1.6134400367736816, "learning_rate": 0.0001972459207739267, "loss": 1.6142, "step": 717 }, { "epoch": 0.1028505944707062, "grad_norm": 1.2656651735305786, "learning_rate": 0.00019723509623419808, "loss": 1.7536, "step": 718 }, { "epoch": 0.10299384042400803, "grad_norm": 1.6002271175384521, "learning_rate": 0.00019722425076215946, "loss": 1.6491, "step": 719 }, { "epoch": 0.10313708637730984, "grad_norm": 1.566291093826294, "learning_rate": 0.00019721338436014558, "loss": 1.5691, "step": 720 }, { "epoch": 0.10328033233061167, "grad_norm": 1.2599036693572998, "learning_rate": 0.00019720249703049573, "loss": 1.8011, "step": 721 }, { "epoch": 0.10342357828391348, "grad_norm": 1.5381052494049072, "learning_rate": 0.0001971915887755536, "loss": 1.5195, "step": 722 }, { "epoch": 0.1035668242372153, "grad_norm": 1.434147834777832, "learning_rate": 0.00019718065959766756, "loss": 1.5305, "step": 723 }, { "epoch": 0.10371007019051712, "grad_norm": 1.3286505937576294, "learning_rate": 0.00019716970949919035, "loss": 1.6205, "step": 724 }, { "epoch": 0.10385331614381894, "grad_norm": 1.2326021194458008, "learning_rate": 0.00019715873848247928, "loss": 1.6684, "step": 725 }, { "epoch": 0.10399656209712076, "grad_norm": 1.330072283744812, "learning_rate": 0.0001971477465498961, "loss": 1.5842, "step": 726 }, { "epoch": 0.10413980805042257, "grad_norm": 1.620344877243042, "learning_rate": 0.00019713673370380712, "loss": 1.5601, "step": 727 }, { "epoch": 0.1042830540037244, "grad_norm": 1.6700549125671387, "learning_rate": 0.00019712569994658315, "loss": 1.5271, "step": 728 }, { "epoch": 0.10442629995702621, "grad_norm": 1.8878201246261597, "learning_rate": 0.00019711464528059946, "loss": 1.3555, "step": 729 }, { "epoch": 0.10456954591032804, "grad_norm": 1.4124029874801636, "learning_rate": 0.00019710356970823587, "loss": 1.6412, "step": 730 }, { "epoch": 0.10471279186362985, "grad_norm": 1.7656874656677246, "learning_rate": 0.00019709247323187662, "loss": 1.6317, "step": 731 }, { "epoch": 0.10485603781693167, "grad_norm": 1.097822904586792, "learning_rate": 0.0001970813558539106, "loss": 1.6414, "step": 732 }, { "epoch": 0.10499928377023349, "grad_norm": 1.4732197523117065, "learning_rate": 0.00019707021757673103, "loss": 1.6305, "step": 733 }, { "epoch": 0.10514252972353531, "grad_norm": 1.3448829650878906, "learning_rate": 0.00019705905840273572, "loss": 1.6061, "step": 734 }, { "epoch": 0.10528577567683713, "grad_norm": 1.3394949436187744, "learning_rate": 0.00019704787833432698, "loss": 1.6372, "step": 735 }, { "epoch": 0.10542902163013895, "grad_norm": 1.4685827493667603, "learning_rate": 0.00019703667737391162, "loss": 1.9355, "step": 736 }, { "epoch": 0.10557226758344077, "grad_norm": 1.8580946922302246, "learning_rate": 0.00019702545552390089, "loss": 1.6555, "step": 737 }, { "epoch": 0.10571551353674259, "grad_norm": 1.9116902351379395, "learning_rate": 0.00019701421278671058, "loss": 1.8081, "step": 738 }, { "epoch": 0.1058587594900444, "grad_norm": 1.2292426824569702, "learning_rate": 0.000197002949164761, "loss": 1.7138, "step": 739 }, { "epoch": 0.10600200544334623, "grad_norm": 1.3974497318267822, "learning_rate": 0.00019699166466047692, "loss": 1.5291, "step": 740 }, { "epoch": 0.10614525139664804, "grad_norm": 1.386409878730774, "learning_rate": 0.0001969803592762876, "loss": 1.652, "step": 741 }, { "epoch": 0.10628849734994987, "grad_norm": 1.6884740591049194, "learning_rate": 0.0001969690330146268, "loss": 1.7535, "step": 742 }, { "epoch": 0.10643174330325168, "grad_norm": 1.5742979049682617, "learning_rate": 0.00019695768587793286, "loss": 1.8504, "step": 743 }, { "epoch": 0.1065749892565535, "grad_norm": 1.4850658178329468, "learning_rate": 0.00019694631786864843, "loss": 1.723, "step": 744 }, { "epoch": 0.10671823520985532, "grad_norm": 1.238373875617981, "learning_rate": 0.00019693492898922084, "loss": 1.6498, "step": 745 }, { "epoch": 0.10686148116315713, "grad_norm": 2.092233180999756, "learning_rate": 0.00019692351924210176, "loss": 1.6978, "step": 746 }, { "epoch": 0.10700472711645896, "grad_norm": 2.1850333213806152, "learning_rate": 0.00019691208862974752, "loss": 1.4748, "step": 747 }, { "epoch": 0.10714797306976077, "grad_norm": 1.5780452489852905, "learning_rate": 0.0001969006371546188, "loss": 1.6114, "step": 748 }, { "epoch": 0.1072912190230626, "grad_norm": 1.6323115825653076, "learning_rate": 0.0001968891648191808, "loss": 1.5691, "step": 749 }, { "epoch": 0.10743446497636441, "grad_norm": 1.273373007774353, "learning_rate": 0.00019687767162590328, "loss": 1.6624, "step": 750 }, { "epoch": 0.10757771092966624, "grad_norm": 1.3526626825332642, "learning_rate": 0.00019686615757726034, "loss": 1.6806, "step": 751 }, { "epoch": 0.10772095688296805, "grad_norm": 1.477441668510437, "learning_rate": 0.0001968546226757308, "loss": 1.7353, "step": 752 }, { "epoch": 0.10786420283626988, "grad_norm": 1.4391818046569824, "learning_rate": 0.00019684306692379776, "loss": 1.6659, "step": 753 }, { "epoch": 0.10800744878957169, "grad_norm": 1.4352343082427979, "learning_rate": 0.0001968314903239489, "loss": 1.4964, "step": 754 }, { "epoch": 0.10815069474287352, "grad_norm": 1.4778071641921997, "learning_rate": 0.00019681989287867636, "loss": 1.6588, "step": 755 }, { "epoch": 0.10829394069617533, "grad_norm": 1.8458248376846313, "learning_rate": 0.00019680827459047685, "loss": 1.6013, "step": 756 }, { "epoch": 0.10843718664947716, "grad_norm": 1.472934365272522, "learning_rate": 0.00019679663546185144, "loss": 1.5839, "step": 757 }, { "epoch": 0.10858043260277897, "grad_norm": 1.2686278820037842, "learning_rate": 0.00019678497549530574, "loss": 1.7562, "step": 758 }, { "epoch": 0.1087236785560808, "grad_norm": 1.8563710451126099, "learning_rate": 0.0001967732946933499, "loss": 1.5606, "step": 759 }, { "epoch": 0.10886692450938261, "grad_norm": 1.3085123300552368, "learning_rate": 0.00019676159305849846, "loss": 1.6645, "step": 760 }, { "epoch": 0.10901017046268444, "grad_norm": 1.513801097869873, "learning_rate": 0.0001967498705932705, "loss": 1.6237, "step": 761 }, { "epoch": 0.10915341641598625, "grad_norm": 1.6214598417282104, "learning_rate": 0.0001967381273001896, "loss": 1.6615, "step": 762 }, { "epoch": 0.10929666236928806, "grad_norm": 1.496747374534607, "learning_rate": 0.00019672636318178381, "loss": 1.5546, "step": 763 }, { "epoch": 0.10943990832258989, "grad_norm": 1.3868905305862427, "learning_rate": 0.0001967145782405856, "loss": 1.6734, "step": 764 }, { "epoch": 0.1095831542758917, "grad_norm": 1.9769505262374878, "learning_rate": 0.00019670277247913205, "loss": 1.6059, "step": 765 }, { "epoch": 0.10972640022919353, "grad_norm": 1.4846587181091309, "learning_rate": 0.00019669094589996457, "loss": 1.6523, "step": 766 }, { "epoch": 0.10986964618249534, "grad_norm": 1.6699373722076416, "learning_rate": 0.00019667909850562917, "loss": 1.4849, "step": 767 }, { "epoch": 0.11001289213579717, "grad_norm": 1.1695612668991089, "learning_rate": 0.00019666723029867632, "loss": 1.5853, "step": 768 }, { "epoch": 0.11015613808909898, "grad_norm": 1.2621443271636963, "learning_rate": 0.00019665534128166092, "loss": 1.7378, "step": 769 }, { "epoch": 0.1102993840424008, "grad_norm": 1.462565302848816, "learning_rate": 0.0001966434314571424, "loss": 1.7238, "step": 770 }, { "epoch": 0.11044262999570262, "grad_norm": 1.6089022159576416, "learning_rate": 0.00019663150082768462, "loss": 1.6665, "step": 771 }, { "epoch": 0.11058587594900444, "grad_norm": 1.5095402002334595, "learning_rate": 0.000196619549395856, "loss": 1.5419, "step": 772 }, { "epoch": 0.11072912190230626, "grad_norm": 1.401183009147644, "learning_rate": 0.00019660757716422932, "loss": 1.4786, "step": 773 }, { "epoch": 0.11087236785560808, "grad_norm": 1.3202089071273804, "learning_rate": 0.00019659558413538198, "loss": 1.6672, "step": 774 }, { "epoch": 0.1110156138089099, "grad_norm": 1.6599305868148804, "learning_rate": 0.0001965835703118957, "loss": 1.648, "step": 775 }, { "epoch": 0.11115885976221172, "grad_norm": 1.1583521366119385, "learning_rate": 0.00019657153569635683, "loss": 1.7374, "step": 776 }, { "epoch": 0.11130210571551354, "grad_norm": 1.4404739141464233, "learning_rate": 0.0001965594802913561, "loss": 1.7739, "step": 777 }, { "epoch": 0.11144535166881536, "grad_norm": 1.8734699487686157, "learning_rate": 0.00019654740409948872, "loss": 1.5631, "step": 778 }, { "epoch": 0.11158859762211717, "grad_norm": 1.410055160522461, "learning_rate": 0.00019653530712335443, "loss": 1.5709, "step": 779 }, { "epoch": 0.11173184357541899, "grad_norm": 1.6245243549346924, "learning_rate": 0.00019652318936555743, "loss": 1.7171, "step": 780 }, { "epoch": 0.11187508952872081, "grad_norm": 1.3341032266616821, "learning_rate": 0.00019651105082870628, "loss": 1.5142, "step": 781 }, { "epoch": 0.11201833548202263, "grad_norm": 1.5533653497695923, "learning_rate": 0.00019649889151541417, "loss": 1.5558, "step": 782 }, { "epoch": 0.11216158143532445, "grad_norm": 1.6404037475585938, "learning_rate": 0.00019648671142829876, "loss": 1.6398, "step": 783 }, { "epoch": 0.11230482738862627, "grad_norm": 1.6427288055419922, "learning_rate": 0.000196474510569982, "loss": 1.618, "step": 784 }, { "epoch": 0.11244807334192809, "grad_norm": 1.4234371185302734, "learning_rate": 0.0001964622889430905, "loss": 1.3855, "step": 785 }, { "epoch": 0.1125913192952299, "grad_norm": 1.3232643604278564, "learning_rate": 0.00019645004655025522, "loss": 1.6925, "step": 786 }, { "epoch": 0.11273456524853173, "grad_norm": 1.4328253269195557, "learning_rate": 0.00019643778339411175, "loss": 1.7753, "step": 787 }, { "epoch": 0.11287781120183354, "grad_norm": 1.5762733221054077, "learning_rate": 0.00019642549947729992, "loss": 1.6083, "step": 788 }, { "epoch": 0.11302105715513537, "grad_norm": 1.3697439432144165, "learning_rate": 0.00019641319480246424, "loss": 1.5214, "step": 789 }, { "epoch": 0.11316430310843718, "grad_norm": 1.1820030212402344, "learning_rate": 0.0001964008693722536, "loss": 1.6088, "step": 790 }, { "epoch": 0.11330754906173901, "grad_norm": 1.2939660549163818, "learning_rate": 0.0001963885231893213, "loss": 1.4251, "step": 791 }, { "epoch": 0.11345079501504082, "grad_norm": 1.9614722728729248, "learning_rate": 0.00019637615625632524, "loss": 1.7844, "step": 792 }, { "epoch": 0.11359404096834265, "grad_norm": 1.4284573793411255, "learning_rate": 0.00019636376857592764, "loss": 1.8043, "step": 793 }, { "epoch": 0.11373728692164446, "grad_norm": 1.34441077709198, "learning_rate": 0.00019635136015079533, "loss": 1.7269, "step": 794 }, { "epoch": 0.11388053287494629, "grad_norm": 1.8618592023849487, "learning_rate": 0.00019633893098359951, "loss": 1.5111, "step": 795 }, { "epoch": 0.1140237788282481, "grad_norm": 1.511165738105774, "learning_rate": 0.00019632648107701585, "loss": 1.6387, "step": 796 }, { "epoch": 0.11416702478154993, "grad_norm": 1.1874998807907104, "learning_rate": 0.00019631401043372447, "loss": 1.7597, "step": 797 }, { "epoch": 0.11431027073485174, "grad_norm": 1.4529027938842773, "learning_rate": 0.00019630151905641012, "loss": 1.515, "step": 798 }, { "epoch": 0.11445351668815355, "grad_norm": 1.3633545637130737, "learning_rate": 0.00019628900694776177, "loss": 1.5391, "step": 799 }, { "epoch": 0.11459676264145538, "grad_norm": 1.3726894855499268, "learning_rate": 0.00019627647411047305, "loss": 1.5234, "step": 800 }, { "epoch": 0.11474000859475719, "grad_norm": 1.7441984415054321, "learning_rate": 0.0001962639205472419, "loss": 1.6786, "step": 801 }, { "epoch": 0.11488325454805902, "grad_norm": 1.3965880870819092, "learning_rate": 0.00019625134626077083, "loss": 1.6971, "step": 802 }, { "epoch": 0.11502650050136083, "grad_norm": 1.2910364866256714, "learning_rate": 0.00019623875125376674, "loss": 1.7357, "step": 803 }, { "epoch": 0.11516974645466266, "grad_norm": 1.306301236152649, "learning_rate": 0.00019622613552894107, "loss": 1.6423, "step": 804 }, { "epoch": 0.11531299240796447, "grad_norm": 1.4404336214065552, "learning_rate": 0.0001962134990890096, "loss": 1.4665, "step": 805 }, { "epoch": 0.1154562383612663, "grad_norm": 1.2204550504684448, "learning_rate": 0.00019620084193669275, "loss": 1.6481, "step": 806 }, { "epoch": 0.11559948431456811, "grad_norm": 1.4873749017715454, "learning_rate": 0.00019618816407471519, "loss": 1.352, "step": 807 }, { "epoch": 0.11574273026786994, "grad_norm": 1.5756500959396362, "learning_rate": 0.00019617546550580622, "loss": 1.5499, "step": 808 }, { "epoch": 0.11588597622117175, "grad_norm": 1.172492265701294, "learning_rate": 0.0001961627462326995, "loss": 1.6223, "step": 809 }, { "epoch": 0.11602922217447358, "grad_norm": 1.3477461338043213, "learning_rate": 0.00019615000625813314, "loss": 1.4655, "step": 810 }, { "epoch": 0.11617246812777539, "grad_norm": 1.4703794717788696, "learning_rate": 0.0001961372455848498, "loss": 1.6865, "step": 811 }, { "epoch": 0.11631571408107721, "grad_norm": 1.4132351875305176, "learning_rate": 0.0001961244642155965, "loss": 1.5305, "step": 812 }, { "epoch": 0.11645896003437903, "grad_norm": 1.4192314147949219, "learning_rate": 0.0001961116621531248, "loss": 1.6134, "step": 813 }, { "epoch": 0.11660220598768085, "grad_norm": 1.3303085565567017, "learning_rate": 0.00019609883940019057, "loss": 1.6348, "step": 814 }, { "epoch": 0.11674545194098267, "grad_norm": 1.469582438468933, "learning_rate": 0.00019608599595955436, "loss": 1.5101, "step": 815 }, { "epoch": 0.11688869789428448, "grad_norm": 1.5950562953948975, "learning_rate": 0.00019607313183398094, "loss": 1.6845, "step": 816 }, { "epoch": 0.1170319438475863, "grad_norm": 1.4950788021087646, "learning_rate": 0.0001960602470262397, "loss": 1.6995, "step": 817 }, { "epoch": 0.11717518980088812, "grad_norm": 1.3118116855621338, "learning_rate": 0.00019604734153910437, "loss": 1.6796, "step": 818 }, { "epoch": 0.11731843575418995, "grad_norm": 1.517374873161316, "learning_rate": 0.00019603441537535324, "loss": 1.5008, "step": 819 }, { "epoch": 0.11746168170749176, "grad_norm": 1.8473433256149292, "learning_rate": 0.00019602146853776894, "loss": 1.7425, "step": 820 }, { "epoch": 0.11760492766079358, "grad_norm": 1.3010900020599365, "learning_rate": 0.00019600850102913865, "loss": 1.6855, "step": 821 }, { "epoch": 0.1177481736140954, "grad_norm": 1.3770560026168823, "learning_rate": 0.00019599551285225393, "loss": 1.7055, "step": 822 }, { "epoch": 0.11789141956739722, "grad_norm": 1.4143677949905396, "learning_rate": 0.00019598250400991077, "loss": 1.5576, "step": 823 }, { "epoch": 0.11803466552069904, "grad_norm": 1.4342479705810547, "learning_rate": 0.00019596947450490975, "loss": 1.6647, "step": 824 }, { "epoch": 0.11817791147400086, "grad_norm": 1.639679193496704, "learning_rate": 0.0001959564243400557, "loss": 1.6066, "step": 825 }, { "epoch": 0.11832115742730268, "grad_norm": 1.459906816482544, "learning_rate": 0.00019594335351815807, "loss": 1.5834, "step": 826 }, { "epoch": 0.1184644033806045, "grad_norm": 1.5267376899719238, "learning_rate": 0.00019593026204203066, "loss": 1.555, "step": 827 }, { "epoch": 0.11860764933390631, "grad_norm": 1.8674683570861816, "learning_rate": 0.0001959171499144917, "loss": 1.5765, "step": 828 }, { "epoch": 0.11875089528720814, "grad_norm": 1.6813234090805054, "learning_rate": 0.00019590401713836397, "loss": 1.6752, "step": 829 }, { "epoch": 0.11889414124050995, "grad_norm": 1.5916492938995361, "learning_rate": 0.0001958908637164746, "loss": 1.5171, "step": 830 }, { "epoch": 0.11903738719381178, "grad_norm": 1.5751780271530151, "learning_rate": 0.0001958776896516552, "loss": 1.7203, "step": 831 }, { "epoch": 0.1191806331471136, "grad_norm": 1.3422431945800781, "learning_rate": 0.00019586449494674187, "loss": 1.5944, "step": 832 }, { "epoch": 0.11932387910041542, "grad_norm": 1.532328486442566, "learning_rate": 0.000195851279604575, "loss": 1.4551, "step": 833 }, { "epoch": 0.11946712505371723, "grad_norm": 1.6953420639038086, "learning_rate": 0.0001958380436279996, "loss": 1.5568, "step": 834 }, { "epoch": 0.11961037100701905, "grad_norm": 1.657399296760559, "learning_rate": 0.000195824787019865, "loss": 1.5253, "step": 835 }, { "epoch": 0.11975361696032087, "grad_norm": 1.3738839626312256, "learning_rate": 0.0001958115097830251, "loss": 1.4798, "step": 836 }, { "epoch": 0.11989686291362268, "grad_norm": 1.2179100513458252, "learning_rate": 0.0001957982119203381, "loss": 1.4871, "step": 837 }, { "epoch": 0.12004010886692451, "grad_norm": 1.6310373544692993, "learning_rate": 0.0001957848934346667, "loss": 1.5856, "step": 838 }, { "epoch": 0.12018335482022632, "grad_norm": 1.4978235960006714, "learning_rate": 0.00019577155432887804, "loss": 1.7682, "step": 839 }, { "epoch": 0.12032660077352815, "grad_norm": 1.4542731046676636, "learning_rate": 0.00019575819460584373, "loss": 1.4687, "step": 840 }, { "epoch": 0.12046984672682996, "grad_norm": 1.1406450271606445, "learning_rate": 0.00019574481426843976, "loss": 1.6196, "step": 841 }, { "epoch": 0.12061309268013179, "grad_norm": 1.8530430793762207, "learning_rate": 0.0001957314133195466, "loss": 1.4314, "step": 842 }, { "epoch": 0.1207563386334336, "grad_norm": 1.730963110923767, "learning_rate": 0.00019571799176204912, "loss": 1.5497, "step": 843 }, { "epoch": 0.12089958458673543, "grad_norm": 1.4124916791915894, "learning_rate": 0.00019570454959883668, "loss": 1.6568, "step": 844 }, { "epoch": 0.12104283054003724, "grad_norm": 1.5251939296722412, "learning_rate": 0.00019569108683280303, "loss": 1.608, "step": 845 }, { "epoch": 0.12118607649333907, "grad_norm": 1.287947177886963, "learning_rate": 0.00019567760346684638, "loss": 1.6271, "step": 846 }, { "epoch": 0.12132932244664088, "grad_norm": 1.4431467056274414, "learning_rate": 0.00019566409950386935, "loss": 1.273, "step": 847 }, { "epoch": 0.12147256839994271, "grad_norm": 1.4683396816253662, "learning_rate": 0.000195650574946779, "loss": 1.7495, "step": 848 }, { "epoch": 0.12161581435324452, "grad_norm": 1.6172163486480713, "learning_rate": 0.00019563702979848686, "loss": 1.6144, "step": 849 }, { "epoch": 0.12175906030654635, "grad_norm": 1.5742977857589722, "learning_rate": 0.00019562346406190888, "loss": 1.5745, "step": 850 }, { "epoch": 0.12190230625984816, "grad_norm": 1.3044545650482178, "learning_rate": 0.00019560987773996536, "loss": 1.6523, "step": 851 }, { "epoch": 0.12204555221314999, "grad_norm": 1.6706430912017822, "learning_rate": 0.00019559627083558115, "loss": 1.5829, "step": 852 }, { "epoch": 0.1221887981664518, "grad_norm": 1.5556046962738037, "learning_rate": 0.00019558264335168548, "loss": 1.5973, "step": 853 }, { "epoch": 0.12233204411975361, "grad_norm": 1.3522742986679077, "learning_rate": 0.000195568995291212, "loss": 1.5215, "step": 854 }, { "epoch": 0.12247529007305544, "grad_norm": 2.2802860736846924, "learning_rate": 0.00019555532665709878, "loss": 1.6063, "step": 855 }, { "epoch": 0.12261853602635725, "grad_norm": 1.3602712154388428, "learning_rate": 0.0001955416374522884, "loss": 1.6613, "step": 856 }, { "epoch": 0.12276178197965908, "grad_norm": 1.4493857622146606, "learning_rate": 0.00019552792767972771, "loss": 1.5722, "step": 857 }, { "epoch": 0.12290502793296089, "grad_norm": 1.3390328884124756, "learning_rate": 0.00019551419734236818, "loss": 1.593, "step": 858 }, { "epoch": 0.12304827388626272, "grad_norm": 1.2195194959640503, "learning_rate": 0.00019550044644316557, "loss": 1.598, "step": 859 }, { "epoch": 0.12319151983956453, "grad_norm": 1.470925211906433, "learning_rate": 0.0001954866749850801, "loss": 1.5284, "step": 860 }, { "epoch": 0.12333476579286635, "grad_norm": 1.6146818399429321, "learning_rate": 0.0001954728829710764, "loss": 1.4087, "step": 861 }, { "epoch": 0.12347801174616817, "grad_norm": 1.2974482774734497, "learning_rate": 0.00019545907040412363, "loss": 1.5172, "step": 862 }, { "epoch": 0.12362125769947, "grad_norm": 1.5922187566757202, "learning_rate": 0.00019544523728719525, "loss": 1.6491, "step": 863 }, { "epoch": 0.1237645036527718, "grad_norm": 1.5290395021438599, "learning_rate": 0.00019543138362326917, "loss": 1.6143, "step": 864 }, { "epoch": 0.12390774960607363, "grad_norm": 1.4779689311981201, "learning_rate": 0.00019541750941532774, "loss": 1.5974, "step": 865 }, { "epoch": 0.12405099555937545, "grad_norm": 1.5644104480743408, "learning_rate": 0.00019540361466635777, "loss": 1.5176, "step": 866 }, { "epoch": 0.12419424151267727, "grad_norm": 1.3992775678634644, "learning_rate": 0.00019538969937935044, "loss": 1.7475, "step": 867 }, { "epoch": 0.12433748746597909, "grad_norm": 1.1582415103912354, "learning_rate": 0.00019537576355730134, "loss": 1.601, "step": 868 }, { "epoch": 0.12448073341928091, "grad_norm": 1.3258936405181885, "learning_rate": 0.00019536180720321054, "loss": 1.5256, "step": 869 }, { "epoch": 0.12462397937258272, "grad_norm": 1.5257527828216553, "learning_rate": 0.00019534783032008248, "loss": 1.6944, "step": 870 }, { "epoch": 0.12476722532588454, "grad_norm": 1.726912260055542, "learning_rate": 0.00019533383291092606, "loss": 1.5629, "step": 871 }, { "epoch": 0.12491047127918636, "grad_norm": 1.3510074615478516, "learning_rate": 0.00019531981497875454, "loss": 1.5718, "step": 872 }, { "epoch": 0.12505371723248818, "grad_norm": 1.416359543800354, "learning_rate": 0.00019530577652658568, "loss": 1.3863, "step": 873 }, { "epoch": 0.12519696318579, "grad_norm": 1.2144155502319336, "learning_rate": 0.00019529171755744158, "loss": 1.5797, "step": 874 }, { "epoch": 0.12534020913909183, "grad_norm": 1.5377811193466187, "learning_rate": 0.00019527763807434878, "loss": 1.6465, "step": 875 }, { "epoch": 0.12548345509239364, "grad_norm": 1.4994487762451172, "learning_rate": 0.00019526353808033825, "loss": 1.7088, "step": 876 }, { "epoch": 0.12562670104569545, "grad_norm": 1.1898484230041504, "learning_rate": 0.0001952494175784454, "loss": 1.441, "step": 877 }, { "epoch": 0.12576994699899727, "grad_norm": 1.889299988746643, "learning_rate": 0.00019523527657171, "loss": 1.5037, "step": 878 }, { "epoch": 0.1259131929522991, "grad_norm": 1.0025755167007446, "learning_rate": 0.00019522111506317625, "loss": 1.5566, "step": 879 }, { "epoch": 0.12605643890560092, "grad_norm": 1.5120162963867188, "learning_rate": 0.00019520693305589282, "loss": 1.4998, "step": 880 }, { "epoch": 0.12619968485890273, "grad_norm": 1.5454044342041016, "learning_rate": 0.00019519273055291266, "loss": 1.5053, "step": 881 }, { "epoch": 0.12634293081220455, "grad_norm": 1.479720950126648, "learning_rate": 0.0001951785075572933, "loss": 1.3877, "step": 882 }, { "epoch": 0.1264861767655064, "grad_norm": 1.395705223083496, "learning_rate": 0.00019516426407209652, "loss": 1.5334, "step": 883 }, { "epoch": 0.1266294227188082, "grad_norm": 1.248108148574829, "learning_rate": 0.0001951500001003887, "loss": 1.6191, "step": 884 }, { "epoch": 0.12677266867211, "grad_norm": 1.399366021156311, "learning_rate": 0.0001951357156452404, "loss": 1.6154, "step": 885 }, { "epoch": 0.12691591462541182, "grad_norm": 1.459180235862732, "learning_rate": 0.00019512141070972678, "loss": 1.7004, "step": 886 }, { "epoch": 0.12705916057871366, "grad_norm": 1.526006817817688, "learning_rate": 0.00019510708529692735, "loss": 1.448, "step": 887 }, { "epoch": 0.12720240653201548, "grad_norm": 1.6083557605743408, "learning_rate": 0.00019509273940992596, "loss": 1.49, "step": 888 }, { "epoch": 0.1273456524853173, "grad_norm": 1.349533200263977, "learning_rate": 0.00019507837305181096, "loss": 1.5824, "step": 889 }, { "epoch": 0.1274888984386191, "grad_norm": 1.4738569259643555, "learning_rate": 0.00019506398622567509, "loss": 1.6849, "step": 890 }, { "epoch": 0.12763214439192092, "grad_norm": 1.4363090991973877, "learning_rate": 0.00019504957893461545, "loss": 1.4545, "step": 891 }, { "epoch": 0.12777539034522276, "grad_norm": 1.1765127182006836, "learning_rate": 0.00019503515118173353, "loss": 1.5411, "step": 892 }, { "epoch": 0.12791863629852457, "grad_norm": 1.5619832277297974, "learning_rate": 0.00019502070297013538, "loss": 1.6108, "step": 893 }, { "epoch": 0.12806188225182638, "grad_norm": 1.396733045578003, "learning_rate": 0.0001950062343029312, "loss": 1.627, "step": 894 }, { "epoch": 0.1282051282051282, "grad_norm": 1.3025591373443604, "learning_rate": 0.00019499174518323588, "loss": 1.67, "step": 895 }, { "epoch": 0.12834837415843003, "grad_norm": 1.5245089530944824, "learning_rate": 0.0001949772356141685, "loss": 1.5114, "step": 896 }, { "epoch": 0.12849162011173185, "grad_norm": 1.7307344675064087, "learning_rate": 0.0001949627055988526, "loss": 1.469, "step": 897 }, { "epoch": 0.12863486606503366, "grad_norm": 1.5148040056228638, "learning_rate": 0.00019494815514041613, "loss": 1.4896, "step": 898 }, { "epoch": 0.12877811201833547, "grad_norm": 1.3124440908432007, "learning_rate": 0.00019493358424199148, "loss": 1.6682, "step": 899 }, { "epoch": 0.1289213579716373, "grad_norm": 1.2774840593338013, "learning_rate": 0.00019491899290671535, "loss": 1.7009, "step": 900 }, { "epoch": 0.12906460392493913, "grad_norm": 1.3566539287567139, "learning_rate": 0.00019490438113772896, "loss": 1.6027, "step": 901 }, { "epoch": 0.12920784987824094, "grad_norm": 1.3069580793380737, "learning_rate": 0.00019488974893817784, "loss": 1.5501, "step": 902 }, { "epoch": 0.12935109583154275, "grad_norm": 1.5154794454574585, "learning_rate": 0.00019487509631121192, "loss": 1.7287, "step": 903 }, { "epoch": 0.1294943417848446, "grad_norm": 1.51203191280365, "learning_rate": 0.00019486042325998556, "loss": 1.4113, "step": 904 }, { "epoch": 0.1296375877381464, "grad_norm": 1.2378709316253662, "learning_rate": 0.0001948457297876575, "loss": 1.443, "step": 905 }, { "epoch": 0.12978083369144822, "grad_norm": 1.6402249336242676, "learning_rate": 0.00019483101589739084, "loss": 1.5459, "step": 906 }, { "epoch": 0.12992407964475003, "grad_norm": 1.5180134773254395, "learning_rate": 0.00019481628159235322, "loss": 1.553, "step": 907 }, { "epoch": 0.13006732559805184, "grad_norm": 1.2810626029968262, "learning_rate": 0.0001948015268757165, "loss": 1.6266, "step": 908 }, { "epoch": 0.13021057155135368, "grad_norm": 1.0726395845413208, "learning_rate": 0.00019478675175065702, "loss": 1.7729, "step": 909 }, { "epoch": 0.1303538175046555, "grad_norm": 1.4035894870758057, "learning_rate": 0.0001947719562203555, "loss": 1.5281, "step": 910 }, { "epoch": 0.1304970634579573, "grad_norm": 1.4306449890136719, "learning_rate": 0.00019475714028799703, "loss": 1.517, "step": 911 }, { "epoch": 0.13064030941125912, "grad_norm": 1.2066540718078613, "learning_rate": 0.0001947423039567711, "loss": 1.6117, "step": 912 }, { "epoch": 0.13078355536456096, "grad_norm": 1.2805730104446411, "learning_rate": 0.0001947274472298717, "loss": 1.6499, "step": 913 }, { "epoch": 0.13092680131786277, "grad_norm": 1.3742079734802246, "learning_rate": 0.00019471257011049702, "loss": 1.5715, "step": 914 }, { "epoch": 0.13107004727116459, "grad_norm": 1.5260566473007202, "learning_rate": 0.00019469767260184975, "loss": 1.5409, "step": 915 }, { "epoch": 0.1312132932244664, "grad_norm": 1.415353775024414, "learning_rate": 0.000194682754707137, "loss": 1.5905, "step": 916 }, { "epoch": 0.13135653917776824, "grad_norm": 1.3228139877319336, "learning_rate": 0.0001946678164295702, "loss": 1.4905, "step": 917 }, { "epoch": 0.13149978513107005, "grad_norm": 1.3434737920761108, "learning_rate": 0.0001946528577723652, "loss": 1.5738, "step": 918 }, { "epoch": 0.13164303108437186, "grad_norm": 1.454965591430664, "learning_rate": 0.00019463787873874217, "loss": 1.5264, "step": 919 }, { "epoch": 0.13178627703767368, "grad_norm": 1.6001193523406982, "learning_rate": 0.0001946228793319258, "loss": 1.7512, "step": 920 }, { "epoch": 0.13192952299097552, "grad_norm": 1.5035537481307983, "learning_rate": 0.00019460785955514504, "loss": 1.5072, "step": 921 }, { "epoch": 0.13207276894427733, "grad_norm": 1.1897765398025513, "learning_rate": 0.00019459281941163332, "loss": 1.498, "step": 922 }, { "epoch": 0.13221601489757914, "grad_norm": 1.2392112016677856, "learning_rate": 0.00019457775890462838, "loss": 1.3123, "step": 923 }, { "epoch": 0.13235926085088096, "grad_norm": 1.2779078483581543, "learning_rate": 0.00019456267803737243, "loss": 1.6161, "step": 924 }, { "epoch": 0.13250250680418277, "grad_norm": 1.4699326753616333, "learning_rate": 0.0001945475768131119, "loss": 1.6932, "step": 925 }, { "epoch": 0.1326457527574846, "grad_norm": 1.2128102779388428, "learning_rate": 0.00019453245523509777, "loss": 1.6362, "step": 926 }, { "epoch": 0.13278899871078642, "grad_norm": 1.7061197757720947, "learning_rate": 0.0001945173133065854, "loss": 1.57, "step": 927 }, { "epoch": 0.13293224466408823, "grad_norm": 1.182349681854248, "learning_rate": 0.00019450215103083437, "loss": 1.7028, "step": 928 }, { "epoch": 0.13307549061739005, "grad_norm": 1.5013049840927124, "learning_rate": 0.0001944869684111088, "loss": 1.4712, "step": 929 }, { "epoch": 0.1332187365706919, "grad_norm": 1.7043782472610474, "learning_rate": 0.00019447176545067711, "loss": 1.6052, "step": 930 }, { "epoch": 0.1333619825239937, "grad_norm": 1.298484206199646, "learning_rate": 0.00019445654215281214, "loss": 1.4566, "step": 931 }, { "epoch": 0.1335052284772955, "grad_norm": 1.3367338180541992, "learning_rate": 0.0001944412985207911, "loss": 1.5929, "step": 932 }, { "epoch": 0.13364847443059732, "grad_norm": 1.4428350925445557, "learning_rate": 0.0001944260345578955, "loss": 1.5432, "step": 933 }, { "epoch": 0.13379172038389917, "grad_norm": 1.440341830253601, "learning_rate": 0.00019441075026741138, "loss": 1.6273, "step": 934 }, { "epoch": 0.13393496633720098, "grad_norm": 1.4038457870483398, "learning_rate": 0.00019439544565262904, "loss": 1.6491, "step": 935 }, { "epoch": 0.1340782122905028, "grad_norm": 1.4589582681655884, "learning_rate": 0.00019438012071684314, "loss": 1.6027, "step": 936 }, { "epoch": 0.1342214582438046, "grad_norm": 1.6593842506408691, "learning_rate": 0.0001943647754633528, "loss": 1.5932, "step": 937 }, { "epoch": 0.13436470419710644, "grad_norm": 1.4873721599578857, "learning_rate": 0.0001943494098954615, "loss": 1.6837, "step": 938 }, { "epoch": 0.13450795015040826, "grad_norm": 1.2693896293640137, "learning_rate": 0.000194334024016477, "loss": 1.613, "step": 939 }, { "epoch": 0.13465119610371007, "grad_norm": 1.2949308156967163, "learning_rate": 0.00019431861782971156, "loss": 1.6487, "step": 940 }, { "epoch": 0.13479444205701188, "grad_norm": 1.2915406227111816, "learning_rate": 0.0001943031913384817, "loss": 1.6778, "step": 941 }, { "epoch": 0.13493768801031372, "grad_norm": 1.363283395767212, "learning_rate": 0.00019428774454610843, "loss": 1.6879, "step": 942 }, { "epoch": 0.13508093396361553, "grad_norm": 1.5493043661117554, "learning_rate": 0.000194272277455917, "loss": 1.5816, "step": 943 }, { "epoch": 0.13522417991691735, "grad_norm": 1.6780341863632202, "learning_rate": 0.0001942567900712371, "loss": 1.7223, "step": 944 }, { "epoch": 0.13536742587021916, "grad_norm": 1.1107063293457031, "learning_rate": 0.00019424128239540277, "loss": 1.5271, "step": 945 }, { "epoch": 0.13551067182352097, "grad_norm": 1.5544624328613281, "learning_rate": 0.0001942257544317525, "loss": 1.6354, "step": 946 }, { "epoch": 0.1356539177768228, "grad_norm": 1.163934350013733, "learning_rate": 0.00019421020618362898, "loss": 1.8291, "step": 947 }, { "epoch": 0.13579716373012463, "grad_norm": 1.355676531791687, "learning_rate": 0.00019419463765437943, "loss": 1.6046, "step": 948 }, { "epoch": 0.13594040968342644, "grad_norm": 1.3692798614501953, "learning_rate": 0.00019417904884735533, "loss": 1.551, "step": 949 }, { "epoch": 0.13608365563672825, "grad_norm": 1.3626713752746582, "learning_rate": 0.00019416343976591261, "loss": 1.4371, "step": 950 }, { "epoch": 0.1362269015900301, "grad_norm": 1.5438525676727295, "learning_rate": 0.0001941478104134115, "loss": 1.4228, "step": 951 }, { "epoch": 0.1363701475433319, "grad_norm": 1.2169398069381714, "learning_rate": 0.00019413216079321654, "loss": 1.6035, "step": 952 }, { "epoch": 0.13651339349663372, "grad_norm": 1.4688122272491455, "learning_rate": 0.00019411649090869684, "loss": 1.5359, "step": 953 }, { "epoch": 0.13665663944993553, "grad_norm": 1.2406110763549805, "learning_rate": 0.00019410080076322564, "loss": 1.5632, "step": 954 }, { "epoch": 0.13679988540323737, "grad_norm": 1.519088625907898, "learning_rate": 0.00019408509036018066, "loss": 1.4936, "step": 955 }, { "epoch": 0.13694313135653918, "grad_norm": 1.0677958726882935, "learning_rate": 0.00019406935970294397, "loss": 1.645, "step": 956 }, { "epoch": 0.137086377309841, "grad_norm": 1.0214921236038208, "learning_rate": 0.00019405360879490202, "loss": 1.5606, "step": 957 }, { "epoch": 0.1372296232631428, "grad_norm": 1.314568281173706, "learning_rate": 0.00019403783763944556, "loss": 1.6634, "step": 958 }, { "epoch": 0.13737286921644465, "grad_norm": 1.625647783279419, "learning_rate": 0.0001940220462399697, "loss": 1.6296, "step": 959 }, { "epoch": 0.13751611516974646, "grad_norm": 1.4990005493164062, "learning_rate": 0.000194006234599874, "loss": 1.6438, "step": 960 }, { "epoch": 0.13765936112304827, "grad_norm": 1.4645360708236694, "learning_rate": 0.00019399040272256225, "loss": 1.4934, "step": 961 }, { "epoch": 0.1378026070763501, "grad_norm": 1.3728829622268677, "learning_rate": 0.00019397455061144272, "loss": 1.545, "step": 962 }, { "epoch": 0.1379458530296519, "grad_norm": 1.4249588251113892, "learning_rate": 0.00019395867826992795, "loss": 1.535, "step": 963 }, { "epoch": 0.13808909898295374, "grad_norm": 1.1676795482635498, "learning_rate": 0.00019394278570143488, "loss": 1.7477, "step": 964 }, { "epoch": 0.13823234493625555, "grad_norm": 1.1489231586456299, "learning_rate": 0.00019392687290938475, "loss": 1.6569, "step": 965 }, { "epoch": 0.13837559088955736, "grad_norm": 1.5753334760665894, "learning_rate": 0.00019391093989720322, "loss": 1.4576, "step": 966 }, { "epoch": 0.13851883684285918, "grad_norm": 1.1450971364974976, "learning_rate": 0.00019389498666832025, "loss": 1.637, "step": 967 }, { "epoch": 0.13866208279616102, "grad_norm": 1.655789852142334, "learning_rate": 0.0001938790132261702, "loss": 1.6492, "step": 968 }, { "epoch": 0.13880532874946283, "grad_norm": 1.4353446960449219, "learning_rate": 0.00019386301957419172, "loss": 1.5714, "step": 969 }, { "epoch": 0.13894857470276464, "grad_norm": 1.4906525611877441, "learning_rate": 0.00019384700571582793, "loss": 1.4296, "step": 970 }, { "epoch": 0.13909182065606646, "grad_norm": 1.2638425827026367, "learning_rate": 0.00019383097165452613, "loss": 1.68, "step": 971 }, { "epoch": 0.1392350666093683, "grad_norm": 1.3439027070999146, "learning_rate": 0.0001938149173937381, "loss": 1.7358, "step": 972 }, { "epoch": 0.1393783125626701, "grad_norm": 1.5346641540527344, "learning_rate": 0.0001937988429369199, "loss": 1.6117, "step": 973 }, { "epoch": 0.13952155851597192, "grad_norm": 1.3425935506820679, "learning_rate": 0.000193782748287532, "loss": 1.7291, "step": 974 }, { "epoch": 0.13966480446927373, "grad_norm": 1.5102351903915405, "learning_rate": 0.00019376663344903913, "loss": 1.6585, "step": 975 }, { "epoch": 0.13980805042257557, "grad_norm": 1.4603413343429565, "learning_rate": 0.00019375049842491047, "loss": 1.4841, "step": 976 }, { "epoch": 0.1399512963758774, "grad_norm": 1.1883057355880737, "learning_rate": 0.00019373434321861942, "loss": 1.588, "step": 977 }, { "epoch": 0.1400945423291792, "grad_norm": 1.2276684045791626, "learning_rate": 0.00019371816783364388, "loss": 1.6725, "step": 978 }, { "epoch": 0.140237788282481, "grad_norm": 1.2830169200897217, "learning_rate": 0.00019370197227346596, "loss": 1.5397, "step": 979 }, { "epoch": 0.14038103423578283, "grad_norm": 1.1476396322250366, "learning_rate": 0.00019368575654157217, "loss": 1.5355, "step": 980 }, { "epoch": 0.14052428018908467, "grad_norm": 1.2300249338150024, "learning_rate": 0.00019366952064145334, "loss": 1.5267, "step": 981 }, { "epoch": 0.14066752614238648, "grad_norm": 1.5411263704299927, "learning_rate": 0.00019365326457660472, "loss": 1.5968, "step": 982 }, { "epoch": 0.1408107720956883, "grad_norm": 1.390707015991211, "learning_rate": 0.00019363698835052576, "loss": 1.5561, "step": 983 }, { "epoch": 0.1409540180489901, "grad_norm": 1.2429877519607544, "learning_rate": 0.00019362069196672037, "loss": 1.6717, "step": 984 }, { "epoch": 0.14109726400229194, "grad_norm": 1.4149080514907837, "learning_rate": 0.00019360437542869676, "loss": 1.5962, "step": 985 }, { "epoch": 0.14124050995559376, "grad_norm": 1.152044415473938, "learning_rate": 0.00019358803873996747, "loss": 1.5248, "step": 986 }, { "epoch": 0.14138375590889557, "grad_norm": 1.3117223978042603, "learning_rate": 0.00019357168190404936, "loss": 1.4728, "step": 987 }, { "epoch": 0.14152700186219738, "grad_norm": 1.118538737297058, "learning_rate": 0.0001935553049244637, "loss": 1.6256, "step": 988 }, { "epoch": 0.14167024781549922, "grad_norm": 1.24899160861969, "learning_rate": 0.00019353890780473602, "loss": 1.6468, "step": 989 }, { "epoch": 0.14181349376880104, "grad_norm": 1.4963958263397217, "learning_rate": 0.00019352249054839624, "loss": 1.4806, "step": 990 }, { "epoch": 0.14195673972210285, "grad_norm": 1.4605729579925537, "learning_rate": 0.00019350605315897852, "loss": 1.5374, "step": 991 }, { "epoch": 0.14209998567540466, "grad_norm": 1.2262187004089355, "learning_rate": 0.00019348959564002152, "loss": 1.4389, "step": 992 }, { "epoch": 0.1422432316287065, "grad_norm": 1.3125232458114624, "learning_rate": 0.00019347311799506803, "loss": 1.6267, "step": 993 }, { "epoch": 0.14238647758200831, "grad_norm": 1.4034234285354614, "learning_rate": 0.0001934566202276654, "loss": 1.7271, "step": 994 }, { "epoch": 0.14252972353531013, "grad_norm": 1.2048908472061157, "learning_rate": 0.00019344010234136508, "loss": 1.5188, "step": 995 }, { "epoch": 0.14267296948861194, "grad_norm": 1.2334035634994507, "learning_rate": 0.00019342356433972303, "loss": 1.5066, "step": 996 }, { "epoch": 0.14281621544191378, "grad_norm": 1.190976619720459, "learning_rate": 0.00019340700622629946, "loss": 1.7507, "step": 997 }, { "epoch": 0.1429594613952156, "grad_norm": 1.717379093170166, "learning_rate": 0.00019339042800465889, "loss": 1.5083, "step": 998 }, { "epoch": 0.1431027073485174, "grad_norm": 1.454601526260376, "learning_rate": 0.00019337382967837024, "loss": 1.5076, "step": 999 }, { "epoch": 0.14324595330181922, "grad_norm": 1.2652497291564941, "learning_rate": 0.0001933572112510067, "loss": 1.4765, "step": 1000 }, { "epoch": 0.14338919925512103, "grad_norm": 1.0453784465789795, "learning_rate": 0.0001933405727261458, "loss": 1.6082, "step": 1001 }, { "epoch": 0.14353244520842287, "grad_norm": 1.3370368480682373, "learning_rate": 0.0001933239141073694, "loss": 1.3811, "step": 1002 }, { "epoch": 0.14367569116172468, "grad_norm": 1.414880394935608, "learning_rate": 0.00019330723539826375, "loss": 1.4487, "step": 1003 }, { "epoch": 0.1438189371150265, "grad_norm": 1.3539701700210571, "learning_rate": 0.00019329053660241928, "loss": 1.5082, "step": 1004 }, { "epoch": 0.1439621830683283, "grad_norm": 1.5669071674346924, "learning_rate": 0.00019327381772343087, "loss": 1.6623, "step": 1005 }, { "epoch": 0.14410542902163015, "grad_norm": 1.0910766124725342, "learning_rate": 0.00019325707876489766, "loss": 1.5005, "step": 1006 }, { "epoch": 0.14424867497493196, "grad_norm": 1.6274842023849487, "learning_rate": 0.00019324031973042317, "loss": 1.4766, "step": 1007 }, { "epoch": 0.14439192092823377, "grad_norm": 1.2391437292099, "learning_rate": 0.00019322354062361517, "loss": 1.5682, "step": 1008 }, { "epoch": 0.1445351668815356, "grad_norm": 1.514203667640686, "learning_rate": 0.0001932067414480858, "loss": 1.6752, "step": 1009 }, { "epoch": 0.14467841283483743, "grad_norm": 1.1302251815795898, "learning_rate": 0.00019318992220745149, "loss": 1.7205, "step": 1010 }, { "epoch": 0.14482165878813924, "grad_norm": 1.638452172279358, "learning_rate": 0.00019317308290533306, "loss": 1.5628, "step": 1011 }, { "epoch": 0.14496490474144105, "grad_norm": 1.5972185134887695, "learning_rate": 0.00019315622354535553, "loss": 1.2919, "step": 1012 }, { "epoch": 0.14510815069474287, "grad_norm": 1.8035101890563965, "learning_rate": 0.00019313934413114832, "loss": 1.4733, "step": 1013 }, { "epoch": 0.1452513966480447, "grad_norm": 1.3459478616714478, "learning_rate": 0.00019312244466634517, "loss": 1.5086, "step": 1014 }, { "epoch": 0.14539464260134652, "grad_norm": 1.4896929264068604, "learning_rate": 0.0001931055251545841, "loss": 1.7146, "step": 1015 }, { "epoch": 0.14553788855464833, "grad_norm": 1.537964105606079, "learning_rate": 0.00019308858559950748, "loss": 1.6314, "step": 1016 }, { "epoch": 0.14568113450795014, "grad_norm": 1.323756217956543, "learning_rate": 0.00019307162600476195, "loss": 1.5025, "step": 1017 }, { "epoch": 0.14582438046125196, "grad_norm": 1.3886237144470215, "learning_rate": 0.00019305464637399853, "loss": 1.4745, "step": 1018 }, { "epoch": 0.1459676264145538, "grad_norm": 1.4149054288864136, "learning_rate": 0.00019303764671087245, "loss": 1.5865, "step": 1019 }, { "epoch": 0.1461108723678556, "grad_norm": 1.5493627786636353, "learning_rate": 0.0001930206270190434, "loss": 1.5541, "step": 1020 }, { "epoch": 0.14625411832115742, "grad_norm": 1.4834001064300537, "learning_rate": 0.00019300358730217526, "loss": 1.4414, "step": 1021 }, { "epoch": 0.14639736427445924, "grad_norm": 1.583128571510315, "learning_rate": 0.0001929865275639362, "loss": 1.5788, "step": 1022 }, { "epoch": 0.14654061022776108, "grad_norm": 1.4072614908218384, "learning_rate": 0.00019296944780799885, "loss": 1.5639, "step": 1023 }, { "epoch": 0.1466838561810629, "grad_norm": 1.1666544675827026, "learning_rate": 0.00019295234803804004, "loss": 1.57, "step": 1024 }, { "epoch": 0.1468271021343647, "grad_norm": 1.5429679155349731, "learning_rate": 0.0001929352282577409, "loss": 1.551, "step": 1025 }, { "epoch": 0.1469703480876665, "grad_norm": 1.1465579271316528, "learning_rate": 0.0001929180884707869, "loss": 1.6996, "step": 1026 }, { "epoch": 0.14711359404096835, "grad_norm": 1.430687427520752, "learning_rate": 0.0001929009286808678, "loss": 1.5423, "step": 1027 }, { "epoch": 0.14725683999427017, "grad_norm": 1.2124978303909302, "learning_rate": 0.0001928837488916777, "loss": 1.4928, "step": 1028 }, { "epoch": 0.14740008594757198, "grad_norm": 1.6632754802703857, "learning_rate": 0.00019286654910691503, "loss": 1.7412, "step": 1029 }, { "epoch": 0.1475433319008738, "grad_norm": 1.1591392755508423, "learning_rate": 0.0001928493293302824, "loss": 1.609, "step": 1030 }, { "epoch": 0.14768657785417563, "grad_norm": 1.2420518398284912, "learning_rate": 0.0001928320895654868, "loss": 1.5294, "step": 1031 }, { "epoch": 0.14782982380747745, "grad_norm": 1.6039559841156006, "learning_rate": 0.00019281482981623957, "loss": 1.6582, "step": 1032 }, { "epoch": 0.14797306976077926, "grad_norm": 1.3535301685333252, "learning_rate": 0.0001927975500862563, "loss": 1.6827, "step": 1033 }, { "epoch": 0.14811631571408107, "grad_norm": 1.2901084423065186, "learning_rate": 0.00019278025037925689, "loss": 1.3468, "step": 1034 }, { "epoch": 0.14825956166738288, "grad_norm": 1.0611035823822021, "learning_rate": 0.00019276293069896548, "loss": 1.7171, "step": 1035 }, { "epoch": 0.14840280762068472, "grad_norm": 1.4382847547531128, "learning_rate": 0.00019274559104911067, "loss": 1.656, "step": 1036 }, { "epoch": 0.14854605357398654, "grad_norm": 1.5861812829971313, "learning_rate": 0.0001927282314334252, "loss": 1.6, "step": 1037 }, { "epoch": 0.14868929952728835, "grad_norm": 1.4645975828170776, "learning_rate": 0.00019271085185564615, "loss": 1.5686, "step": 1038 }, { "epoch": 0.14883254548059016, "grad_norm": 1.5574449300765991, "learning_rate": 0.00019269345231951493, "loss": 1.6133, "step": 1039 }, { "epoch": 0.148975791433892, "grad_norm": 1.3897409439086914, "learning_rate": 0.00019267603282877724, "loss": 1.4349, "step": 1040 }, { "epoch": 0.14911903738719381, "grad_norm": 1.7466847896575928, "learning_rate": 0.00019265859338718304, "loss": 1.5295, "step": 1041 }, { "epoch": 0.14926228334049563, "grad_norm": 1.3798412084579468, "learning_rate": 0.00019264113399848664, "loss": 1.543, "step": 1042 }, { "epoch": 0.14940552929379744, "grad_norm": 1.1986595392227173, "learning_rate": 0.0001926236546664466, "loss": 1.6707, "step": 1043 }, { "epoch": 0.14954877524709928, "grad_norm": 1.080202579498291, "learning_rate": 0.00019260615539482584, "loss": 1.6254, "step": 1044 }, { "epoch": 0.1496920212004011, "grad_norm": 1.2281359434127808, "learning_rate": 0.0001925886361873914, "loss": 1.6682, "step": 1045 }, { "epoch": 0.1498352671537029, "grad_norm": 1.5164756774902344, "learning_rate": 0.00019257109704791484, "loss": 1.6561, "step": 1046 }, { "epoch": 0.14997851310700472, "grad_norm": 1.232721209526062, "learning_rate": 0.00019255353798017184, "loss": 1.7319, "step": 1047 }, { "epoch": 0.15012175906030656, "grad_norm": 1.3041349649429321, "learning_rate": 0.00019253595898794247, "loss": 1.449, "step": 1048 }, { "epoch": 0.15026500501360837, "grad_norm": 1.3203157186508179, "learning_rate": 0.00019251836007501102, "loss": 1.5185, "step": 1049 }, { "epoch": 0.15040825096691018, "grad_norm": 1.2182791233062744, "learning_rate": 0.00019250074124516618, "loss": 1.6029, "step": 1050 }, { "epoch": 0.150551496920212, "grad_norm": 1.092157006263733, "learning_rate": 0.00019248310250220073, "loss": 1.4837, "step": 1051 }, { "epoch": 0.1506947428735138, "grad_norm": 1.3514152765274048, "learning_rate": 0.0001924654438499119, "loss": 1.6147, "step": 1052 }, { "epoch": 0.15083798882681565, "grad_norm": 1.3528683185577393, "learning_rate": 0.00019244776529210122, "loss": 1.3918, "step": 1053 }, { "epoch": 0.15098123478011746, "grad_norm": 1.3136584758758545, "learning_rate": 0.00019243006683257439, "loss": 1.5979, "step": 1054 }, { "epoch": 0.15112448073341928, "grad_norm": 1.66302490234375, "learning_rate": 0.00019241234847514144, "loss": 1.591, "step": 1055 }, { "epoch": 0.1512677266867211, "grad_norm": 1.2907557487487793, "learning_rate": 0.00019239461022361676, "loss": 1.5207, "step": 1056 }, { "epoch": 0.15141097264002293, "grad_norm": 1.7057480812072754, "learning_rate": 0.00019237685208181886, "loss": 1.3596, "step": 1057 }, { "epoch": 0.15155421859332474, "grad_norm": 1.4090209007263184, "learning_rate": 0.0001923590740535707, "loss": 1.8182, "step": 1058 }, { "epoch": 0.15169746454662655, "grad_norm": 1.323533058166504, "learning_rate": 0.00019234127614269943, "loss": 1.6263, "step": 1059 }, { "epoch": 0.15184071049992837, "grad_norm": 1.1065683364868164, "learning_rate": 0.00019232345835303648, "loss": 1.5436, "step": 1060 }, { "epoch": 0.1519839564532302, "grad_norm": 1.2325598001480103, "learning_rate": 0.0001923056206884176, "loss": 1.4932, "step": 1061 }, { "epoch": 0.15212720240653202, "grad_norm": 1.4053757190704346, "learning_rate": 0.0001922877631526828, "loss": 1.7089, "step": 1062 }, { "epoch": 0.15227044835983383, "grad_norm": 1.3989908695220947, "learning_rate": 0.0001922698857496764, "loss": 1.4073, "step": 1063 }, { "epoch": 0.15241369431313564, "grad_norm": 1.4045591354370117, "learning_rate": 0.0001922519884832469, "loss": 1.5376, "step": 1064 }, { "epoch": 0.15255694026643749, "grad_norm": 1.3122562170028687, "learning_rate": 0.00019223407135724713, "loss": 1.4856, "step": 1065 }, { "epoch": 0.1527001862197393, "grad_norm": 1.3495731353759766, "learning_rate": 0.00019221613437553426, "loss": 1.5055, "step": 1066 }, { "epoch": 0.1528434321730411, "grad_norm": 1.4818278551101685, "learning_rate": 0.00019219817754196966, "loss": 1.6641, "step": 1067 }, { "epoch": 0.15298667812634292, "grad_norm": 1.2352901697158813, "learning_rate": 0.00019218020086041898, "loss": 1.6724, "step": 1068 }, { "epoch": 0.15312992407964476, "grad_norm": 1.309185266494751, "learning_rate": 0.00019216220433475214, "loss": 1.5106, "step": 1069 }, { "epoch": 0.15327317003294658, "grad_norm": 1.2199681997299194, "learning_rate": 0.0001921441879688434, "loss": 1.6603, "step": 1070 }, { "epoch": 0.1534164159862484, "grad_norm": 1.2713277339935303, "learning_rate": 0.00019212615176657116, "loss": 1.4, "step": 1071 }, { "epoch": 0.1535596619395502, "grad_norm": 1.369075894355774, "learning_rate": 0.00019210809573181825, "loss": 1.5886, "step": 1072 }, { "epoch": 0.15370290789285201, "grad_norm": 1.1621137857437134, "learning_rate": 0.00019209001986847163, "loss": 1.5208, "step": 1073 }, { "epoch": 0.15384615384615385, "grad_norm": 1.0331898927688599, "learning_rate": 0.00019207192418042266, "loss": 1.4522, "step": 1074 }, { "epoch": 0.15398939979945567, "grad_norm": 1.3525487184524536, "learning_rate": 0.00019205380867156677, "loss": 1.6604, "step": 1075 }, { "epoch": 0.15413264575275748, "grad_norm": 1.1547057628631592, "learning_rate": 0.0001920356733458039, "loss": 1.8436, "step": 1076 }, { "epoch": 0.1542758917060593, "grad_norm": 1.2889951467514038, "learning_rate": 0.00019201751820703807, "loss": 1.5426, "step": 1077 }, { "epoch": 0.15441913765936113, "grad_norm": 1.2044836282730103, "learning_rate": 0.00019199934325917766, "loss": 1.5849, "step": 1078 }, { "epoch": 0.15456238361266295, "grad_norm": 1.6906284093856812, "learning_rate": 0.00019198114850613524, "loss": 1.4109, "step": 1079 }, { "epoch": 0.15470562956596476, "grad_norm": 1.5398533344268799, "learning_rate": 0.00019196293395182777, "loss": 1.5977, "step": 1080 }, { "epoch": 0.15484887551926657, "grad_norm": 1.1078940629959106, "learning_rate": 0.0001919446996001763, "loss": 1.7665, "step": 1081 }, { "epoch": 0.1549921214725684, "grad_norm": 1.2386343479156494, "learning_rate": 0.00019192644545510635, "loss": 1.7485, "step": 1082 }, { "epoch": 0.15513536742587022, "grad_norm": 1.2610774040222168, "learning_rate": 0.0001919081715205475, "loss": 1.7379, "step": 1083 }, { "epoch": 0.15527861337917204, "grad_norm": 1.390198826789856, "learning_rate": 0.00019188987780043365, "loss": 1.4863, "step": 1084 }, { "epoch": 0.15542185933247385, "grad_norm": 1.479318380355835, "learning_rate": 0.00019187156429870307, "loss": 1.5026, "step": 1085 }, { "epoch": 0.1555651052857757, "grad_norm": 1.1878783702850342, "learning_rate": 0.00019185323101929814, "loss": 1.487, "step": 1086 }, { "epoch": 0.1557083512390775, "grad_norm": 1.4197934865951538, "learning_rate": 0.0001918348779661656, "loss": 1.4418, "step": 1087 }, { "epoch": 0.15585159719237932, "grad_norm": 1.3864970207214355, "learning_rate": 0.00019181650514325641, "loss": 1.5701, "step": 1088 }, { "epoch": 0.15599484314568113, "grad_norm": 1.2309435606002808, "learning_rate": 0.00019179811255452575, "loss": 1.5973, "step": 1089 }, { "epoch": 0.15613808909898294, "grad_norm": 1.1256530284881592, "learning_rate": 0.00019177970020393313, "loss": 1.6458, "step": 1090 }, { "epoch": 0.15628133505228478, "grad_norm": 1.2956477403640747, "learning_rate": 0.00019176126809544223, "loss": 1.5702, "step": 1091 }, { "epoch": 0.1564245810055866, "grad_norm": 1.2641352415084839, "learning_rate": 0.00019174281623302107, "loss": 1.5626, "step": 1092 }, { "epoch": 0.1565678269588884, "grad_norm": 2.111788272857666, "learning_rate": 0.0001917243446206418, "loss": 1.5446, "step": 1093 }, { "epoch": 0.15671107291219022, "grad_norm": 1.5006740093231201, "learning_rate": 0.00019170585326228103, "loss": 1.4848, "step": 1094 }, { "epoch": 0.15685431886549206, "grad_norm": 1.1169055700302124, "learning_rate": 0.00019168734216191936, "loss": 1.5926, "step": 1095 }, { "epoch": 0.15699756481879387, "grad_norm": 1.3934125900268555, "learning_rate": 0.0001916688113235419, "loss": 1.467, "step": 1096 }, { "epoch": 0.15714081077209568, "grad_norm": 1.2894898653030396, "learning_rate": 0.00019165026075113777, "loss": 1.5184, "step": 1097 }, { "epoch": 0.1572840567253975, "grad_norm": 1.2575918436050415, "learning_rate": 0.0001916316904487005, "loss": 1.4199, "step": 1098 }, { "epoch": 0.15742730267869934, "grad_norm": 1.2589964866638184, "learning_rate": 0.00019161310042022778, "loss": 1.5166, "step": 1099 }, { "epoch": 0.15757054863200115, "grad_norm": 1.171149730682373, "learning_rate": 0.00019159449066972163, "loss": 1.6936, "step": 1100 }, { "epoch": 0.15771379458530296, "grad_norm": 1.1980068683624268, "learning_rate": 0.00019157586120118828, "loss": 1.6043, "step": 1101 }, { "epoch": 0.15785704053860478, "grad_norm": 1.1674002408981323, "learning_rate": 0.00019155721201863816, "loss": 1.5042, "step": 1102 }, { "epoch": 0.15800028649190662, "grad_norm": 2.0021567344665527, "learning_rate": 0.00019153854312608593, "loss": 1.5049, "step": 1103 }, { "epoch": 0.15814353244520843, "grad_norm": 1.520702600479126, "learning_rate": 0.00019151985452755063, "loss": 1.5655, "step": 1104 }, { "epoch": 0.15828677839851024, "grad_norm": 1.4473119974136353, "learning_rate": 0.00019150114622705543, "loss": 1.7629, "step": 1105 }, { "epoch": 0.15843002435181205, "grad_norm": 1.2951511144638062, "learning_rate": 0.0001914824182286277, "loss": 1.4727, "step": 1106 }, { "epoch": 0.15857327030511387, "grad_norm": 1.4768249988555908, "learning_rate": 0.00019146367053629918, "loss": 1.6218, "step": 1107 }, { "epoch": 0.1587165162584157, "grad_norm": 1.0381557941436768, "learning_rate": 0.0001914449031541058, "loss": 1.4425, "step": 1108 }, { "epoch": 0.15885976221171752, "grad_norm": 1.259022831916809, "learning_rate": 0.00019142611608608765, "loss": 1.4646, "step": 1109 }, { "epoch": 0.15900300816501933, "grad_norm": 1.2443740367889404, "learning_rate": 0.00019140730933628916, "loss": 1.5468, "step": 1110 }, { "epoch": 0.15914625411832115, "grad_norm": 1.210586428642273, "learning_rate": 0.00019138848290875898, "loss": 1.5649, "step": 1111 }, { "epoch": 0.15928950007162299, "grad_norm": 1.402271032333374, "learning_rate": 0.00019136963680754988, "loss": 1.3465, "step": 1112 }, { "epoch": 0.1594327460249248, "grad_norm": 1.1137586832046509, "learning_rate": 0.00019135077103671908, "loss": 1.5549, "step": 1113 }, { "epoch": 0.1595759919782266, "grad_norm": 1.2129237651824951, "learning_rate": 0.00019133188560032784, "loss": 1.5458, "step": 1114 }, { "epoch": 0.15971923793152842, "grad_norm": 1.4060721397399902, "learning_rate": 0.00019131298050244173, "loss": 1.5444, "step": 1115 }, { "epoch": 0.15986248388483026, "grad_norm": 1.139362096786499, "learning_rate": 0.00019129405574713057, "loss": 1.4979, "step": 1116 }, { "epoch": 0.16000572983813208, "grad_norm": 1.2220191955566406, "learning_rate": 0.0001912751113384684, "loss": 1.4723, "step": 1117 }, { "epoch": 0.1601489757914339, "grad_norm": 1.1440412998199463, "learning_rate": 0.00019125614728053344, "loss": 1.4987, "step": 1118 }, { "epoch": 0.1602922217447357, "grad_norm": 1.4038132429122925, "learning_rate": 0.0001912371635774082, "loss": 1.4667, "step": 1119 }, { "epoch": 0.16043546769803754, "grad_norm": 1.1797844171524048, "learning_rate": 0.00019121816023317948, "loss": 1.5144, "step": 1120 }, { "epoch": 0.16057871365133936, "grad_norm": 1.3288965225219727, "learning_rate": 0.0001911991372519381, "loss": 1.3749, "step": 1121 }, { "epoch": 0.16072195960464117, "grad_norm": 1.6327883005142212, "learning_rate": 0.0001911800946377793, "loss": 1.5428, "step": 1122 }, { "epoch": 0.16086520555794298, "grad_norm": 1.2628464698791504, "learning_rate": 0.0001911610323948025, "loss": 1.4392, "step": 1123 }, { "epoch": 0.16100845151124482, "grad_norm": 1.2259173393249512, "learning_rate": 0.0001911419505271113, "loss": 1.5197, "step": 1124 }, { "epoch": 0.16115169746454663, "grad_norm": 1.0414924621582031, "learning_rate": 0.0001911228490388136, "loss": 1.6483, "step": 1125 }, { "epoch": 0.16129494341784845, "grad_norm": 1.392253041267395, "learning_rate": 0.0001911037279340214, "loss": 1.5102, "step": 1126 }, { "epoch": 0.16143818937115026, "grad_norm": 0.9352260828018188, "learning_rate": 0.00019108458721685105, "loss": 1.7936, "step": 1127 }, { "epoch": 0.16158143532445207, "grad_norm": 1.005239486694336, "learning_rate": 0.00019106542689142306, "loss": 1.5246, "step": 1128 }, { "epoch": 0.1617246812777539, "grad_norm": 1.0168390274047852, "learning_rate": 0.0001910462469618622, "loss": 1.6308, "step": 1129 }, { "epoch": 0.16186792723105572, "grad_norm": 1.3340356349945068, "learning_rate": 0.0001910270474322974, "loss": 1.4908, "step": 1130 }, { "epoch": 0.16201117318435754, "grad_norm": 1.097332239151001, "learning_rate": 0.00019100782830686188, "loss": 1.503, "step": 1131 }, { "epoch": 0.16215441913765935, "grad_norm": 1.283360242843628, "learning_rate": 0.000190988589589693, "loss": 1.5463, "step": 1132 }, { "epoch": 0.1622976650909612, "grad_norm": 1.2783915996551514, "learning_rate": 0.00019096933128493238, "loss": 1.6493, "step": 1133 }, { "epoch": 0.162440911044263, "grad_norm": 1.088844656944275, "learning_rate": 0.0001909500533967259, "loss": 1.6921, "step": 1134 }, { "epoch": 0.16258415699756482, "grad_norm": 1.1864125728607178, "learning_rate": 0.00019093075592922358, "loss": 1.4782, "step": 1135 }, { "epoch": 0.16272740295086663, "grad_norm": 1.3389954566955566, "learning_rate": 0.0001909114388865797, "loss": 1.582, "step": 1136 }, { "epoch": 0.16287064890416847, "grad_norm": 1.2726613283157349, "learning_rate": 0.00019089210227295276, "loss": 1.5965, "step": 1137 }, { "epoch": 0.16301389485747028, "grad_norm": 1.3534932136535645, "learning_rate": 0.0001908727460925054, "loss": 1.5993, "step": 1138 }, { "epoch": 0.1631571408107721, "grad_norm": 1.1192597150802612, "learning_rate": 0.00019085337034940457, "loss": 1.2934, "step": 1139 }, { "epoch": 0.1633003867640739, "grad_norm": 1.1365140676498413, "learning_rate": 0.00019083397504782138, "loss": 1.6308, "step": 1140 }, { "epoch": 0.16344363271737575, "grad_norm": 1.239017367362976, "learning_rate": 0.00019081456019193112, "loss": 1.6364, "step": 1141 }, { "epoch": 0.16358687867067756, "grad_norm": 1.0785810947418213, "learning_rate": 0.00019079512578591337, "loss": 1.4922, "step": 1142 }, { "epoch": 0.16373012462397937, "grad_norm": 1.31537926197052, "learning_rate": 0.0001907756718339519, "loss": 1.5822, "step": 1143 }, { "epoch": 0.16387337057728119, "grad_norm": 1.128383994102478, "learning_rate": 0.0001907561983402346, "loss": 1.586, "step": 1144 }, { "epoch": 0.164016616530583, "grad_norm": 0.966109573841095, "learning_rate": 0.0001907367053089537, "loss": 1.6256, "step": 1145 }, { "epoch": 0.16415986248388484, "grad_norm": 1.2360272407531738, "learning_rate": 0.00019071719274430554, "loss": 1.5867, "step": 1146 }, { "epoch": 0.16430310843718665, "grad_norm": 1.1176432371139526, "learning_rate": 0.00019069766065049067, "loss": 1.5729, "step": 1147 }, { "epoch": 0.16444635439048846, "grad_norm": 1.1923054456710815, "learning_rate": 0.00019067810903171384, "loss": 1.5145, "step": 1148 }, { "epoch": 0.16458960034379028, "grad_norm": 1.384315848350525, "learning_rate": 0.0001906585378921841, "loss": 1.5497, "step": 1149 }, { "epoch": 0.16473284629709212, "grad_norm": 1.5045276880264282, "learning_rate": 0.0001906389472361146, "loss": 1.5576, "step": 1150 }, { "epoch": 0.16487609225039393, "grad_norm": 1.384010910987854, "learning_rate": 0.00019061933706772274, "loss": 1.5436, "step": 1151 }, { "epoch": 0.16501933820369574, "grad_norm": 1.8074532747268677, "learning_rate": 0.0001905997073912301, "loss": 1.4112, "step": 1152 }, { "epoch": 0.16516258415699756, "grad_norm": 1.0780630111694336, "learning_rate": 0.00019058005821086244, "loss": 1.4486, "step": 1153 }, { "epoch": 0.1653058301102994, "grad_norm": 1.2198182344436646, "learning_rate": 0.00019056038953084973, "loss": 1.6487, "step": 1154 }, { "epoch": 0.1654490760636012, "grad_norm": 1.2643805742263794, "learning_rate": 0.00019054070135542618, "loss": 1.5346, "step": 1155 }, { "epoch": 0.16559232201690302, "grad_norm": 1.2533457279205322, "learning_rate": 0.00019052099368883018, "loss": 1.5605, "step": 1156 }, { "epoch": 0.16573556797020483, "grad_norm": 1.2444400787353516, "learning_rate": 0.00019050126653530426, "loss": 1.4794, "step": 1157 }, { "epoch": 0.16587881392350667, "grad_norm": 1.3984493017196655, "learning_rate": 0.00019048151989909523, "loss": 1.5666, "step": 1158 }, { "epoch": 0.1660220598768085, "grad_norm": 1.9702389240264893, "learning_rate": 0.00019046175378445405, "loss": 1.4601, "step": 1159 }, { "epoch": 0.1661653058301103, "grad_norm": 1.6798146963119507, "learning_rate": 0.00019044196819563588, "loss": 1.6333, "step": 1160 }, { "epoch": 0.1663085517834121, "grad_norm": 1.4010471105575562, "learning_rate": 0.0001904221631369, "loss": 1.4371, "step": 1161 }, { "epoch": 0.16645179773671392, "grad_norm": 1.5270562171936035, "learning_rate": 0.00019040233861251002, "loss": 1.5629, "step": 1162 }, { "epoch": 0.16659504369001576, "grad_norm": 1.4402588605880737, "learning_rate": 0.00019038249462673365, "loss": 1.4874, "step": 1163 }, { "epoch": 0.16673828964331758, "grad_norm": 1.0173940658569336, "learning_rate": 0.0001903626311838428, "loss": 1.4896, "step": 1164 }, { "epoch": 0.1668815355966194, "grad_norm": 1.4004608392715454, "learning_rate": 0.00019034274828811358, "loss": 1.5575, "step": 1165 }, { "epoch": 0.1670247815499212, "grad_norm": 1.4390407800674438, "learning_rate": 0.0001903228459438263, "loss": 1.3593, "step": 1166 }, { "epoch": 0.16716802750322304, "grad_norm": 1.4150819778442383, "learning_rate": 0.00019030292415526544, "loss": 1.5085, "step": 1167 }, { "epoch": 0.16731127345652486, "grad_norm": 1.3124033212661743, "learning_rate": 0.00019028298292671965, "loss": 1.3581, "step": 1168 }, { "epoch": 0.16745451940982667, "grad_norm": 1.2406296730041504, "learning_rate": 0.0001902630222624818, "loss": 1.4113, "step": 1169 }, { "epoch": 0.16759776536312848, "grad_norm": 1.18865168094635, "learning_rate": 0.00019024304216684888, "loss": 1.7488, "step": 1170 }, { "epoch": 0.16774101131643032, "grad_norm": 1.2740721702575684, "learning_rate": 0.00019022304264412217, "loss": 1.514, "step": 1171 }, { "epoch": 0.16788425726973213, "grad_norm": 1.198022723197937, "learning_rate": 0.00019020302369860708, "loss": 1.5708, "step": 1172 }, { "epoch": 0.16802750322303395, "grad_norm": 1.2177618741989136, "learning_rate": 0.00019018298533461314, "loss": 1.5316, "step": 1173 }, { "epoch": 0.16817074917633576, "grad_norm": 1.2639113664627075, "learning_rate": 0.00019016292755645418, "loss": 1.6626, "step": 1174 }, { "epoch": 0.1683139951296376, "grad_norm": 1.3574819564819336, "learning_rate": 0.00019014285036844804, "loss": 1.5747, "step": 1175 }, { "epoch": 0.1684572410829394, "grad_norm": 1.2896956205368042, "learning_rate": 0.00019012275377491695, "loss": 1.5548, "step": 1176 }, { "epoch": 0.16860048703624123, "grad_norm": 1.3717323541641235, "learning_rate": 0.00019010263778018716, "loss": 1.6679, "step": 1177 }, { "epoch": 0.16874373298954304, "grad_norm": 1.316711187362671, "learning_rate": 0.00019008250238858914, "loss": 1.5343, "step": 1178 }, { "epoch": 0.16888697894284485, "grad_norm": 1.3912372589111328, "learning_rate": 0.00019006234760445754, "loss": 1.5242, "step": 1179 }, { "epoch": 0.1690302248961467, "grad_norm": 1.1723421812057495, "learning_rate": 0.0001900421734321312, "loss": 1.586, "step": 1180 }, { "epoch": 0.1691734708494485, "grad_norm": 1.4555163383483887, "learning_rate": 0.00019002197987595313, "loss": 1.6012, "step": 1181 }, { "epoch": 0.16931671680275032, "grad_norm": 1.5780225992202759, "learning_rate": 0.00019000176694027049, "loss": 1.4767, "step": 1182 }, { "epoch": 0.16945996275605213, "grad_norm": 1.1260582208633423, "learning_rate": 0.00018998153462943462, "loss": 1.502, "step": 1183 }, { "epoch": 0.16960320870935397, "grad_norm": 1.557626485824585, "learning_rate": 0.00018996128294780106, "loss": 1.5532, "step": 1184 }, { "epoch": 0.16974645466265578, "grad_norm": 1.4284131526947021, "learning_rate": 0.00018994101189972944, "loss": 1.5989, "step": 1185 }, { "epoch": 0.1698897006159576, "grad_norm": 1.2266610860824585, "learning_rate": 0.00018992072148958368, "loss": 1.6673, "step": 1186 }, { "epoch": 0.1700329465692594, "grad_norm": 1.2829517126083374, "learning_rate": 0.00018990041172173178, "loss": 1.5749, "step": 1187 }, { "epoch": 0.17017619252256125, "grad_norm": 1.4330170154571533, "learning_rate": 0.00018988008260054591, "loss": 1.6927, "step": 1188 }, { "epoch": 0.17031943847586306, "grad_norm": 1.4083924293518066, "learning_rate": 0.00018985973413040245, "loss": 1.5968, "step": 1189 }, { "epoch": 0.17046268442916487, "grad_norm": 1.2225673198699951, "learning_rate": 0.00018983936631568194, "loss": 1.7187, "step": 1190 }, { "epoch": 0.1706059303824667, "grad_norm": 1.3002638816833496, "learning_rate": 0.000189818979160769, "loss": 1.6167, "step": 1191 }, { "epoch": 0.17074917633576853, "grad_norm": 1.4452366828918457, "learning_rate": 0.00018979857267005255, "loss": 1.5847, "step": 1192 }, { "epoch": 0.17089242228907034, "grad_norm": 1.315406322479248, "learning_rate": 0.00018977814684792557, "loss": 1.5015, "step": 1193 }, { "epoch": 0.17103566824237215, "grad_norm": 1.3214411735534668, "learning_rate": 0.0001897577016987852, "loss": 1.5417, "step": 1194 }, { "epoch": 0.17117891419567396, "grad_norm": 1.129775881767273, "learning_rate": 0.00018973723722703285, "loss": 1.3749, "step": 1195 }, { "epoch": 0.1713221601489758, "grad_norm": 1.1681206226348877, "learning_rate": 0.00018971675343707397, "loss": 1.5046, "step": 1196 }, { "epoch": 0.17146540610227762, "grad_norm": 1.353795051574707, "learning_rate": 0.00018969625033331822, "loss": 1.6288, "step": 1197 }, { "epoch": 0.17160865205557943, "grad_norm": 1.6154727935791016, "learning_rate": 0.0001896757279201794, "loss": 1.6408, "step": 1198 }, { "epoch": 0.17175189800888124, "grad_norm": 1.6788369417190552, "learning_rate": 0.00018965518620207549, "loss": 1.5689, "step": 1199 }, { "epoch": 0.17189514396218306, "grad_norm": 1.5394922494888306, "learning_rate": 0.00018963462518342862, "loss": 1.4842, "step": 1200 }, { "epoch": 0.1720383899154849, "grad_norm": 1.4983444213867188, "learning_rate": 0.00018961404486866508, "loss": 1.4745, "step": 1201 }, { "epoch": 0.1721816358687867, "grad_norm": 1.1654797792434692, "learning_rate": 0.00018959344526221525, "loss": 1.4511, "step": 1202 }, { "epoch": 0.17232488182208852, "grad_norm": 1.3940114974975586, "learning_rate": 0.00018957282636851376, "loss": 1.5774, "step": 1203 }, { "epoch": 0.17246812777539033, "grad_norm": 1.5903339385986328, "learning_rate": 0.00018955218819199937, "loss": 1.636, "step": 1204 }, { "epoch": 0.17261137372869217, "grad_norm": 1.554872989654541, "learning_rate": 0.00018953153073711487, "loss": 1.5397, "step": 1205 }, { "epoch": 0.172754619681994, "grad_norm": 1.257312536239624, "learning_rate": 0.0001895108540083074, "loss": 1.6073, "step": 1206 }, { "epoch": 0.1728978656352958, "grad_norm": 1.4855549335479736, "learning_rate": 0.0001894901580100281, "loss": 1.5972, "step": 1207 }, { "epoch": 0.1730411115885976, "grad_norm": 1.3067160844802856, "learning_rate": 0.00018946944274673234, "loss": 1.5547, "step": 1208 }, { "epoch": 0.17318435754189945, "grad_norm": 1.513991117477417, "learning_rate": 0.00018944870822287956, "loss": 1.5859, "step": 1209 }, { "epoch": 0.17332760349520127, "grad_norm": 1.20363187789917, "learning_rate": 0.00018942795444293342, "loss": 1.5133, "step": 1210 }, { "epoch": 0.17347084944850308, "grad_norm": 1.4597328901290894, "learning_rate": 0.00018940718141136168, "loss": 1.5437, "step": 1211 }, { "epoch": 0.1736140954018049, "grad_norm": 1.5445020198822021, "learning_rate": 0.0001893863891326363, "loss": 1.4522, "step": 1212 }, { "epoch": 0.17375734135510673, "grad_norm": 1.2678565979003906, "learning_rate": 0.00018936557761123327, "loss": 1.5093, "step": 1213 }, { "epoch": 0.17390058730840854, "grad_norm": 1.2605350017547607, "learning_rate": 0.00018934474685163285, "loss": 1.64, "step": 1214 }, { "epoch": 0.17404383326171036, "grad_norm": 1.2464580535888672, "learning_rate": 0.00018932389685831936, "loss": 1.5262, "step": 1215 }, { "epoch": 0.17418707921501217, "grad_norm": 1.2617474794387817, "learning_rate": 0.00018930302763578132, "loss": 1.6288, "step": 1216 }, { "epoch": 0.17433032516831398, "grad_norm": 1.3272151947021484, "learning_rate": 0.0001892821391885113, "loss": 1.5683, "step": 1217 }, { "epoch": 0.17447357112161582, "grad_norm": 1.3451011180877686, "learning_rate": 0.00018926123152100615, "loss": 1.4513, "step": 1218 }, { "epoch": 0.17461681707491764, "grad_norm": 1.1489102840423584, "learning_rate": 0.0001892403046377667, "loss": 1.6464, "step": 1219 }, { "epoch": 0.17476006302821945, "grad_norm": 1.304760217666626, "learning_rate": 0.00018921935854329802, "loss": 1.3769, "step": 1220 }, { "epoch": 0.17490330898152126, "grad_norm": 1.2934755086898804, "learning_rate": 0.00018919839324210927, "loss": 1.5776, "step": 1221 }, { "epoch": 0.1750465549348231, "grad_norm": 1.4807761907577515, "learning_rate": 0.00018917740873871378, "loss": 1.5134, "step": 1222 }, { "epoch": 0.1751898008881249, "grad_norm": 1.3128358125686646, "learning_rate": 0.000189156405037629, "loss": 1.4788, "step": 1223 }, { "epoch": 0.17533304684142673, "grad_norm": 1.6213737726211548, "learning_rate": 0.0001891353821433765, "loss": 1.5816, "step": 1224 }, { "epoch": 0.17547629279472854, "grad_norm": 1.4727811813354492, "learning_rate": 0.00018911434006048196, "loss": 1.4297, "step": 1225 }, { "epoch": 0.17561953874803038, "grad_norm": 1.4761892557144165, "learning_rate": 0.00018909327879347524, "loss": 1.4269, "step": 1226 }, { "epoch": 0.1757627847013322, "grad_norm": 1.0209108591079712, "learning_rate": 0.00018907219834689033, "loss": 1.5705, "step": 1227 }, { "epoch": 0.175906030654634, "grad_norm": 1.575480341911316, "learning_rate": 0.00018905109872526532, "loss": 1.5731, "step": 1228 }, { "epoch": 0.17604927660793582, "grad_norm": 1.4144480228424072, "learning_rate": 0.0001890299799331424, "loss": 1.4263, "step": 1229 }, { "epoch": 0.17619252256123766, "grad_norm": 1.124971866607666, "learning_rate": 0.00018900884197506796, "loss": 1.5052, "step": 1230 }, { "epoch": 0.17633576851453947, "grad_norm": 1.4364194869995117, "learning_rate": 0.00018898768485559248, "loss": 1.5004, "step": 1231 }, { "epoch": 0.17647901446784128, "grad_norm": 1.3133175373077393, "learning_rate": 0.00018896650857927054, "loss": 1.5857, "step": 1232 }, { "epoch": 0.1766222604211431, "grad_norm": 1.299665093421936, "learning_rate": 0.00018894531315066088, "loss": 1.5527, "step": 1233 }, { "epoch": 0.1767655063744449, "grad_norm": 1.4516721963882446, "learning_rate": 0.00018892409857432636, "loss": 1.272, "step": 1234 }, { "epoch": 0.17690875232774675, "grad_norm": 1.3323822021484375, "learning_rate": 0.00018890286485483395, "loss": 1.5578, "step": 1235 }, { "epoch": 0.17705199828104856, "grad_norm": 1.1122660636901855, "learning_rate": 0.00018888161199675474, "loss": 1.6342, "step": 1236 }, { "epoch": 0.17719524423435037, "grad_norm": 1.2867708206176758, "learning_rate": 0.00018886034000466391, "loss": 1.456, "step": 1237 }, { "epoch": 0.1773384901876522, "grad_norm": 1.4764132499694824, "learning_rate": 0.0001888390488831409, "loss": 1.3443, "step": 1238 }, { "epoch": 0.17748173614095403, "grad_norm": 1.1385782957077026, "learning_rate": 0.00018881773863676905, "loss": 1.7634, "step": 1239 }, { "epoch": 0.17762498209425584, "grad_norm": 1.081588625907898, "learning_rate": 0.00018879640927013598, "loss": 1.474, "step": 1240 }, { "epoch": 0.17776822804755765, "grad_norm": 1.0898150205612183, "learning_rate": 0.00018877506078783337, "loss": 1.5884, "step": 1241 }, { "epoch": 0.17791147400085947, "grad_norm": 1.514620065689087, "learning_rate": 0.000188753693194457, "loss": 1.388, "step": 1242 }, { "epoch": 0.1780547199541613, "grad_norm": 1.4251086711883545, "learning_rate": 0.0001887323064946068, "loss": 1.6073, "step": 1243 }, { "epoch": 0.17819796590746312, "grad_norm": 1.2940031290054321, "learning_rate": 0.00018871090069288678, "loss": 1.7119, "step": 1244 }, { "epoch": 0.17834121186076493, "grad_norm": 1.2243354320526123, "learning_rate": 0.0001886894757939051, "loss": 1.4685, "step": 1245 }, { "epoch": 0.17848445781406674, "grad_norm": 1.044276237487793, "learning_rate": 0.00018866803180227402, "loss": 1.6356, "step": 1246 }, { "epoch": 0.17862770376736858, "grad_norm": 1.1098270416259766, "learning_rate": 0.00018864656872260985, "loss": 1.6102, "step": 1247 }, { "epoch": 0.1787709497206704, "grad_norm": 1.2511557340621948, "learning_rate": 0.00018862508655953316, "loss": 1.4131, "step": 1248 }, { "epoch": 0.1789141956739722, "grad_norm": 1.3002887964248657, "learning_rate": 0.0001886035853176684, "loss": 1.8038, "step": 1249 }, { "epoch": 0.17905744162727402, "grad_norm": 1.2917287349700928, "learning_rate": 0.00018858206500164436, "loss": 1.4838, "step": 1250 }, { "epoch": 0.17920068758057586, "grad_norm": 1.290832757949829, "learning_rate": 0.00018856052561609378, "loss": 1.6101, "step": 1251 }, { "epoch": 0.17934393353387768, "grad_norm": 0.9336103200912476, "learning_rate": 0.00018853896716565358, "loss": 1.6037, "step": 1252 }, { "epoch": 0.1794871794871795, "grad_norm": 1.3125022649765015, "learning_rate": 0.00018851738965496476, "loss": 1.4673, "step": 1253 }, { "epoch": 0.1796304254404813, "grad_norm": 1.1871716976165771, "learning_rate": 0.00018849579308867238, "loss": 1.5099, "step": 1254 }, { "epoch": 0.1797736713937831, "grad_norm": 1.5702705383300781, "learning_rate": 0.00018847417747142568, "loss": 1.5679, "step": 1255 }, { "epoch": 0.17991691734708495, "grad_norm": 1.1900712251663208, "learning_rate": 0.00018845254280787797, "loss": 1.4986, "step": 1256 }, { "epoch": 0.18006016330038677, "grad_norm": 1.5937440395355225, "learning_rate": 0.00018843088910268664, "loss": 1.6138, "step": 1257 }, { "epoch": 0.18020340925368858, "grad_norm": 1.2704687118530273, "learning_rate": 0.00018840921636051325, "loss": 1.5548, "step": 1258 }, { "epoch": 0.1803466552069904, "grad_norm": 1.2647334337234497, "learning_rate": 0.00018838752458602334, "loss": 1.521, "step": 1259 }, { "epoch": 0.18048990116029223, "grad_norm": 1.4259322881698608, "learning_rate": 0.00018836581378388665, "loss": 1.5202, "step": 1260 }, { "epoch": 0.18063314711359404, "grad_norm": 1.2759722471237183, "learning_rate": 0.00018834408395877693, "loss": 1.5498, "step": 1261 }, { "epoch": 0.18077639306689586, "grad_norm": 1.0806058645248413, "learning_rate": 0.00018832233511537216, "loss": 1.4809, "step": 1262 }, { "epoch": 0.18091963902019767, "grad_norm": 1.004581093788147, "learning_rate": 0.00018830056725835424, "loss": 1.6035, "step": 1263 }, { "epoch": 0.1810628849734995, "grad_norm": 1.1678098440170288, "learning_rate": 0.00018827878039240933, "loss": 1.4315, "step": 1264 }, { "epoch": 0.18120613092680132, "grad_norm": 0.9221726655960083, "learning_rate": 0.00018825697452222754, "loss": 1.5908, "step": 1265 }, { "epoch": 0.18134937688010314, "grad_norm": 1.002099871635437, "learning_rate": 0.00018823514965250317, "loss": 1.5367, "step": 1266 }, { "epoch": 0.18149262283340495, "grad_norm": 1.2065305709838867, "learning_rate": 0.00018821330578793453, "loss": 1.3529, "step": 1267 }, { "epoch": 0.1816358687867068, "grad_norm": 1.3609874248504639, "learning_rate": 0.0001881914429332241, "loss": 1.5409, "step": 1268 }, { "epoch": 0.1817791147400086, "grad_norm": 1.1752578020095825, "learning_rate": 0.00018816956109307843, "loss": 1.6717, "step": 1269 }, { "epoch": 0.18192236069331041, "grad_norm": 1.355809211730957, "learning_rate": 0.0001881476602722081, "loss": 1.4575, "step": 1270 }, { "epoch": 0.18206560664661223, "grad_norm": 1.412541151046753, "learning_rate": 0.00018812574047532782, "loss": 1.4629, "step": 1271 }, { "epoch": 0.18220885259991404, "grad_norm": 1.0576441287994385, "learning_rate": 0.00018810380170715643, "loss": 1.4376, "step": 1272 }, { "epoch": 0.18235209855321588, "grad_norm": 1.1732678413391113, "learning_rate": 0.00018808184397241674, "loss": 1.5155, "step": 1273 }, { "epoch": 0.1824953445065177, "grad_norm": 1.0407720804214478, "learning_rate": 0.0001880598672758357, "loss": 1.6492, "step": 1274 }, { "epoch": 0.1826385904598195, "grad_norm": 0.9912101626396179, "learning_rate": 0.00018803787162214442, "loss": 1.3754, "step": 1275 }, { "epoch": 0.18278183641312132, "grad_norm": 1.3536498546600342, "learning_rate": 0.00018801585701607793, "loss": 1.535, "step": 1276 }, { "epoch": 0.18292508236642316, "grad_norm": 1.4388937950134277, "learning_rate": 0.00018799382346237553, "loss": 1.6789, "step": 1277 }, { "epoch": 0.18306832831972497, "grad_norm": 1.400286078453064, "learning_rate": 0.0001879717709657804, "loss": 1.6491, "step": 1278 }, { "epoch": 0.18321157427302678, "grad_norm": 1.3085813522338867, "learning_rate": 0.00018794969953104, "loss": 1.4905, "step": 1279 }, { "epoch": 0.1833548202263286, "grad_norm": 0.994949996471405, "learning_rate": 0.00018792760916290567, "loss": 1.4742, "step": 1280 }, { "epoch": 0.18349806617963044, "grad_norm": 1.4386508464813232, "learning_rate": 0.00018790549986613298, "loss": 1.6211, "step": 1281 }, { "epoch": 0.18364131213293225, "grad_norm": 1.4195054769515991, "learning_rate": 0.0001878833716454815, "loss": 1.5751, "step": 1282 }, { "epoch": 0.18378455808623406, "grad_norm": 1.2426488399505615, "learning_rate": 0.00018786122450571485, "loss": 1.5271, "step": 1283 }, { "epoch": 0.18392780403953587, "grad_norm": 1.2151144742965698, "learning_rate": 0.0001878390584516008, "loss": 1.5028, "step": 1284 }, { "epoch": 0.18407104999283772, "grad_norm": 1.4324997663497925, "learning_rate": 0.00018781687348791114, "loss": 1.5807, "step": 1285 }, { "epoch": 0.18421429594613953, "grad_norm": 1.0086147785186768, "learning_rate": 0.00018779466961942176, "loss": 1.4979, "step": 1286 }, { "epoch": 0.18435754189944134, "grad_norm": 1.231493592262268, "learning_rate": 0.00018777244685091259, "loss": 1.3738, "step": 1287 }, { "epoch": 0.18450078785274315, "grad_norm": 1.3142328262329102, "learning_rate": 0.00018775020518716761, "loss": 1.3915, "step": 1288 }, { "epoch": 0.18464403380604497, "grad_norm": 1.110546588897705, "learning_rate": 0.00018772794463297498, "loss": 1.4936, "step": 1289 }, { "epoch": 0.1847872797593468, "grad_norm": 1.082787036895752, "learning_rate": 0.00018770566519312677, "loss": 1.5962, "step": 1290 }, { "epoch": 0.18493052571264862, "grad_norm": 1.1838253736495972, "learning_rate": 0.00018768336687241926, "loss": 1.4919, "step": 1291 }, { "epoch": 0.18507377166595043, "grad_norm": 1.272300362586975, "learning_rate": 0.00018766104967565266, "loss": 1.5732, "step": 1292 }, { "epoch": 0.18521701761925224, "grad_norm": 1.4428644180297852, "learning_rate": 0.00018763871360763136, "loss": 1.5202, "step": 1293 }, { "epoch": 0.18536026357255408, "grad_norm": 1.3102538585662842, "learning_rate": 0.00018761635867316372, "loss": 1.5307, "step": 1294 }, { "epoch": 0.1855035095258559, "grad_norm": 1.2022842168807983, "learning_rate": 0.0001875939848770622, "loss": 1.5179, "step": 1295 }, { "epoch": 0.1856467554791577, "grad_norm": 1.2640762329101562, "learning_rate": 0.0001875715922241434, "loss": 1.581, "step": 1296 }, { "epoch": 0.18579000143245952, "grad_norm": 1.1838408708572388, "learning_rate": 0.00018754918071922782, "loss": 1.4242, "step": 1297 }, { "epoch": 0.18593324738576136, "grad_norm": 1.2696324586868286, "learning_rate": 0.00018752675036714015, "loss": 1.5717, "step": 1298 }, { "epoch": 0.18607649333906318, "grad_norm": 1.3920341730117798, "learning_rate": 0.00018750430117270913, "loss": 1.4246, "step": 1299 }, { "epoch": 0.186219739292365, "grad_norm": 1.1627073287963867, "learning_rate": 0.00018748183314076737, "loss": 1.5879, "step": 1300 }, { "epoch": 0.1863629852456668, "grad_norm": 1.2461504936218262, "learning_rate": 0.00018745934627615186, "loss": 1.6001, "step": 1301 }, { "epoch": 0.18650623119896864, "grad_norm": 1.5189625024795532, "learning_rate": 0.0001874368405837033, "loss": 1.458, "step": 1302 }, { "epoch": 0.18664947715227045, "grad_norm": 1.3910939693450928, "learning_rate": 0.00018741431606826672, "loss": 1.437, "step": 1303 }, { "epoch": 0.18679272310557227, "grad_norm": 1.3026515245437622, "learning_rate": 0.0001873917727346911, "loss": 1.6072, "step": 1304 }, { "epoch": 0.18693596905887408, "grad_norm": 1.2518068552017212, "learning_rate": 0.00018736921058782935, "loss": 1.4938, "step": 1305 }, { "epoch": 0.1870792150121759, "grad_norm": 1.391098976135254, "learning_rate": 0.00018734662963253867, "loss": 1.4608, "step": 1306 }, { "epoch": 0.18722246096547773, "grad_norm": 1.2206428050994873, "learning_rate": 0.0001873240298736801, "loss": 1.3664, "step": 1307 }, { "epoch": 0.18736570691877955, "grad_norm": 1.2746695280075073, "learning_rate": 0.00018730141131611882, "loss": 1.5006, "step": 1308 }, { "epoch": 0.18750895287208136, "grad_norm": 1.448061227798462, "learning_rate": 0.00018727877396472408, "loss": 1.4169, "step": 1309 }, { "epoch": 0.18765219882538317, "grad_norm": 1.2230345010757446, "learning_rate": 0.00018725611782436911, "loss": 1.4712, "step": 1310 }, { "epoch": 0.187795444778685, "grad_norm": 1.2678667306900024, "learning_rate": 0.00018723344289993122, "loss": 1.3659, "step": 1311 }, { "epoch": 0.18793869073198682, "grad_norm": 1.203021764755249, "learning_rate": 0.00018721074919629177, "loss": 1.5056, "step": 1312 }, { "epoch": 0.18808193668528864, "grad_norm": 1.3539254665374756, "learning_rate": 0.00018718803671833616, "loss": 1.4424, "step": 1313 }, { "epoch": 0.18822518263859045, "grad_norm": 1.0582293272018433, "learning_rate": 0.0001871653054709538, "loss": 1.5905, "step": 1314 }, { "epoch": 0.1883684285918923, "grad_norm": 1.0168813467025757, "learning_rate": 0.0001871425554590382, "loss": 1.6622, "step": 1315 }, { "epoch": 0.1885116745451941, "grad_norm": 1.4426645040512085, "learning_rate": 0.00018711978668748685, "loss": 1.4436, "step": 1316 }, { "epoch": 0.18865492049849591, "grad_norm": 1.4375327825546265, "learning_rate": 0.00018709699916120127, "loss": 1.4814, "step": 1317 }, { "epoch": 0.18879816645179773, "grad_norm": 1.2645779848098755, "learning_rate": 0.00018707419288508713, "loss": 1.5021, "step": 1318 }, { "epoch": 0.18894141240509957, "grad_norm": 1.1306257247924805, "learning_rate": 0.00018705136786405398, "loss": 1.4533, "step": 1319 }, { "epoch": 0.18908465835840138, "grad_norm": 1.6387449502944946, "learning_rate": 0.00018702852410301554, "loss": 1.3, "step": 1320 }, { "epoch": 0.1892279043117032, "grad_norm": 1.2490342855453491, "learning_rate": 0.00018700566160688946, "loss": 1.6539, "step": 1321 }, { "epoch": 0.189371150265005, "grad_norm": 1.1145542860031128, "learning_rate": 0.00018698278038059752, "loss": 1.6368, "step": 1322 }, { "epoch": 0.18951439621830685, "grad_norm": 1.474271297454834, "learning_rate": 0.00018695988042906542, "loss": 1.3596, "step": 1323 }, { "epoch": 0.18965764217160866, "grad_norm": 1.216958999633789, "learning_rate": 0.00018693696175722303, "loss": 1.4823, "step": 1324 }, { "epoch": 0.18980088812491047, "grad_norm": 1.1959834098815918, "learning_rate": 0.00018691402437000408, "loss": 1.5421, "step": 1325 }, { "epoch": 0.18994413407821228, "grad_norm": 1.1801759004592896, "learning_rate": 0.0001868910682723465, "loss": 1.4484, "step": 1326 }, { "epoch": 0.1900873800315141, "grad_norm": 1.584790825843811, "learning_rate": 0.00018686809346919213, "loss": 1.5239, "step": 1327 }, { "epoch": 0.19023062598481594, "grad_norm": 1.0420902967453003, "learning_rate": 0.0001868450999654869, "loss": 1.5239, "step": 1328 }, { "epoch": 0.19037387193811775, "grad_norm": 1.1460515260696411, "learning_rate": 0.00018682208776618072, "loss": 1.6759, "step": 1329 }, { "epoch": 0.19051711789141956, "grad_norm": 1.3263332843780518, "learning_rate": 0.00018679905687622758, "loss": 1.4208, "step": 1330 }, { "epoch": 0.19066036384472138, "grad_norm": 1.160867691040039, "learning_rate": 0.0001867760073005854, "loss": 1.4879, "step": 1331 }, { "epoch": 0.19080360979802322, "grad_norm": 1.2872892618179321, "learning_rate": 0.00018675293904421624, "loss": 1.6284, "step": 1332 }, { "epoch": 0.19094685575132503, "grad_norm": 1.4801182746887207, "learning_rate": 0.0001867298521120861, "loss": 1.3618, "step": 1333 }, { "epoch": 0.19109010170462684, "grad_norm": 1.6616297960281372, "learning_rate": 0.00018670674650916506, "loss": 1.5601, "step": 1334 }, { "epoch": 0.19123334765792865, "grad_norm": 1.1809343099594116, "learning_rate": 0.0001866836222404271, "loss": 1.6917, "step": 1335 }, { "epoch": 0.1913765936112305, "grad_norm": 1.2287925481796265, "learning_rate": 0.0001866604793108504, "loss": 1.6374, "step": 1336 }, { "epoch": 0.1915198395645323, "grad_norm": 1.015394687652588, "learning_rate": 0.00018663731772541702, "loss": 1.4899, "step": 1337 }, { "epoch": 0.19166308551783412, "grad_norm": 1.2321321964263916, "learning_rate": 0.00018661413748911304, "loss": 1.5549, "step": 1338 }, { "epoch": 0.19180633147113593, "grad_norm": 1.0912636518478394, "learning_rate": 0.00018659093860692866, "loss": 1.301, "step": 1339 }, { "epoch": 0.19194957742443777, "grad_norm": 1.2184662818908691, "learning_rate": 0.000186567721083858, "loss": 1.4529, "step": 1340 }, { "epoch": 0.19209282337773959, "grad_norm": 1.1306071281433105, "learning_rate": 0.00018654448492489917, "loss": 1.5536, "step": 1341 }, { "epoch": 0.1922360693310414, "grad_norm": 1.3233520984649658, "learning_rate": 0.0001865212301350544, "loss": 1.5416, "step": 1342 }, { "epoch": 0.1923793152843432, "grad_norm": 1.313374638557434, "learning_rate": 0.00018649795671932986, "loss": 1.4261, "step": 1343 }, { "epoch": 0.19252256123764502, "grad_norm": 1.1718517541885376, "learning_rate": 0.0001864746646827357, "loss": 1.5037, "step": 1344 }, { "epoch": 0.19266580719094686, "grad_norm": 1.3711566925048828, "learning_rate": 0.00018645135403028617, "loss": 1.4132, "step": 1345 }, { "epoch": 0.19280905314424868, "grad_norm": 1.4926767349243164, "learning_rate": 0.00018642802476699944, "loss": 1.5733, "step": 1346 }, { "epoch": 0.1929522990975505, "grad_norm": 1.3704196214675903, "learning_rate": 0.00018640467689789775, "loss": 1.403, "step": 1347 }, { "epoch": 0.1930955450508523, "grad_norm": 1.1321619749069214, "learning_rate": 0.00018638131042800733, "loss": 1.7169, "step": 1348 }, { "epoch": 0.19323879100415414, "grad_norm": 1.237526297569275, "learning_rate": 0.00018635792536235836, "loss": 1.5314, "step": 1349 }, { "epoch": 0.19338203695745596, "grad_norm": 1.1408690214157104, "learning_rate": 0.00018633452170598508, "loss": 1.6169, "step": 1350 }, { "epoch": 0.19352528291075777, "grad_norm": 1.1132980585098267, "learning_rate": 0.00018631109946392574, "loss": 1.3108, "step": 1351 }, { "epoch": 0.19366852886405958, "grad_norm": 1.0329493284225464, "learning_rate": 0.00018628765864122255, "loss": 1.384, "step": 1352 }, { "epoch": 0.19381177481736142, "grad_norm": 1.2056758403778076, "learning_rate": 0.00018626419924292173, "loss": 1.4361, "step": 1353 }, { "epoch": 0.19395502077066323, "grad_norm": 1.3188925981521606, "learning_rate": 0.00018624072127407351, "loss": 1.5968, "step": 1354 }, { "epoch": 0.19409826672396505, "grad_norm": 1.3645230531692505, "learning_rate": 0.00018621722473973216, "loss": 1.5848, "step": 1355 }, { "epoch": 0.19424151267726686, "grad_norm": 1.1375141143798828, "learning_rate": 0.00018619370964495586, "loss": 1.5364, "step": 1356 }, { "epoch": 0.1943847586305687, "grad_norm": 1.3814671039581299, "learning_rate": 0.00018617017599480682, "loss": 1.4141, "step": 1357 }, { "epoch": 0.1945280045838705, "grad_norm": 1.353074550628662, "learning_rate": 0.00018614662379435129, "loss": 1.4439, "step": 1358 }, { "epoch": 0.19467125053717232, "grad_norm": 1.1491092443466187, "learning_rate": 0.0001861230530486594, "loss": 1.6599, "step": 1359 }, { "epoch": 0.19481449649047414, "grad_norm": 1.1675071716308594, "learning_rate": 0.00018609946376280548, "loss": 1.4965, "step": 1360 }, { "epoch": 0.19495774244377595, "grad_norm": 1.0877861976623535, "learning_rate": 0.0001860758559418676, "loss": 1.4455, "step": 1361 }, { "epoch": 0.1951009883970778, "grad_norm": 1.1705178022384644, "learning_rate": 0.000186052229590928, "loss": 1.4463, "step": 1362 }, { "epoch": 0.1952442343503796, "grad_norm": 1.2562415599822998, "learning_rate": 0.00018602858471507283, "loss": 1.5457, "step": 1363 }, { "epoch": 0.19538748030368142, "grad_norm": 1.2922163009643555, "learning_rate": 0.00018600492131939225, "loss": 1.4898, "step": 1364 }, { "epoch": 0.19553072625698323, "grad_norm": 1.102820873260498, "learning_rate": 0.00018598123940898037, "loss": 1.4937, "step": 1365 }, { "epoch": 0.19567397221028507, "grad_norm": 0.9924041628837585, "learning_rate": 0.00018595753898893537, "loss": 1.6517, "step": 1366 }, { "epoch": 0.19581721816358688, "grad_norm": 1.1563290357589722, "learning_rate": 0.00018593382006435935, "loss": 1.4809, "step": 1367 }, { "epoch": 0.1959604641168887, "grad_norm": 1.358456015586853, "learning_rate": 0.0001859100826403584, "loss": 1.3629, "step": 1368 }, { "epoch": 0.1961037100701905, "grad_norm": 1.2798407077789307, "learning_rate": 0.00018588632672204264, "loss": 1.2177, "step": 1369 }, { "epoch": 0.19624695602349235, "grad_norm": 2.063000440597534, "learning_rate": 0.00018586255231452605, "loss": 1.5109, "step": 1370 }, { "epoch": 0.19639020197679416, "grad_norm": 1.2859746217727661, "learning_rate": 0.0001858387594229267, "loss": 1.3825, "step": 1371 }, { "epoch": 0.19653344793009597, "grad_norm": 1.2065383195877075, "learning_rate": 0.00018581494805236667, "loss": 1.6827, "step": 1372 }, { "epoch": 0.19667669388339779, "grad_norm": 1.323138952255249, "learning_rate": 0.00018579111820797185, "loss": 1.3593, "step": 1373 }, { "epoch": 0.19681993983669963, "grad_norm": 1.2470442056655884, "learning_rate": 0.00018576726989487233, "loss": 1.4548, "step": 1374 }, { "epoch": 0.19696318579000144, "grad_norm": 1.1567609310150146, "learning_rate": 0.00018574340311820203, "loss": 1.5156, "step": 1375 }, { "epoch": 0.19710643174330325, "grad_norm": 1.2888273000717163, "learning_rate": 0.00018571951788309883, "loss": 1.5113, "step": 1376 }, { "epoch": 0.19724967769660506, "grad_norm": 1.0796053409576416, "learning_rate": 0.00018569561419470466, "loss": 1.4605, "step": 1377 }, { "epoch": 0.1973929236499069, "grad_norm": 0.9260571002960205, "learning_rate": 0.00018567169205816538, "loss": 1.7155, "step": 1378 }, { "epoch": 0.19753616960320872, "grad_norm": 1.3426995277404785, "learning_rate": 0.00018564775147863086, "loss": 1.435, "step": 1379 }, { "epoch": 0.19767941555651053, "grad_norm": 1.362938642501831, "learning_rate": 0.0001856237924612549, "loss": 1.3506, "step": 1380 }, { "epoch": 0.19782266150981234, "grad_norm": 1.527593731880188, "learning_rate": 0.00018559981501119525, "loss": 1.4524, "step": 1381 }, { "epoch": 0.19796590746311415, "grad_norm": 1.4399865865707397, "learning_rate": 0.00018557581913361372, "loss": 1.7006, "step": 1382 }, { "epoch": 0.198109153416416, "grad_norm": 1.4785088300704956, "learning_rate": 0.000185551804833676, "loss": 1.4817, "step": 1383 }, { "epoch": 0.1982523993697178, "grad_norm": 1.270953893661499, "learning_rate": 0.00018552777211655182, "loss": 1.4826, "step": 1384 }, { "epoch": 0.19839564532301962, "grad_norm": 1.1148852109909058, "learning_rate": 0.00018550372098741474, "loss": 1.6778, "step": 1385 }, { "epoch": 0.19853889127632143, "grad_norm": 1.291720986366272, "learning_rate": 0.00018547965145144244, "loss": 1.5262, "step": 1386 }, { "epoch": 0.19868213722962327, "grad_norm": 0.9664726257324219, "learning_rate": 0.00018545556351381643, "loss": 1.5423, "step": 1387 }, { "epoch": 0.1988253831829251, "grad_norm": 1.152539610862732, "learning_rate": 0.00018543145717972234, "loss": 1.5401, "step": 1388 }, { "epoch": 0.1989686291362269, "grad_norm": 1.0582084655761719, "learning_rate": 0.00018540733245434962, "loss": 1.2952, "step": 1389 }, { "epoch": 0.1991118750895287, "grad_norm": 1.1961158514022827, "learning_rate": 0.00018538318934289172, "loss": 1.4996, "step": 1390 }, { "epoch": 0.19925512104283055, "grad_norm": 1.396264910697937, "learning_rate": 0.00018535902785054605, "loss": 1.5477, "step": 1391 }, { "epoch": 0.19939836699613236, "grad_norm": 1.3177614212036133, "learning_rate": 0.00018533484798251398, "loss": 1.6327, "step": 1392 }, { "epoch": 0.19954161294943418, "grad_norm": 1.250672698020935, "learning_rate": 0.00018531064974400087, "loss": 1.4355, "step": 1393 }, { "epoch": 0.199684858902736, "grad_norm": 1.361416220664978, "learning_rate": 0.000185286433140216, "loss": 1.4801, "step": 1394 }, { "epoch": 0.19982810485603783, "grad_norm": 1.3723920583724976, "learning_rate": 0.00018526219817637256, "loss": 1.5312, "step": 1395 }, { "epoch": 0.19997135080933964, "grad_norm": 1.6589053869247437, "learning_rate": 0.00018523794485768774, "loss": 1.5444, "step": 1396 }, { "epoch": 0.20011459676264146, "grad_norm": 1.036561369895935, "learning_rate": 0.00018521367318938275, "loss": 1.6317, "step": 1397 }, { "epoch": 0.20025784271594327, "grad_norm": 1.1682296991348267, "learning_rate": 0.00018518938317668262, "loss": 1.6605, "step": 1398 }, { "epoch": 0.20040108866924508, "grad_norm": 1.135780930519104, "learning_rate": 0.00018516507482481637, "loss": 1.558, "step": 1399 }, { "epoch": 0.20054433462254692, "grad_norm": 1.2439090013504028, "learning_rate": 0.00018514074813901705, "loss": 1.6033, "step": 1400 }, { "epoch": 0.20068758057584873, "grad_norm": 1.1552304029464722, "learning_rate": 0.00018511640312452156, "loss": 1.5208, "step": 1401 }, { "epoch": 0.20083082652915055, "grad_norm": 1.5979442596435547, "learning_rate": 0.0001850920397865708, "loss": 1.3715, "step": 1402 }, { "epoch": 0.20097407248245236, "grad_norm": 1.4474765062332153, "learning_rate": 0.00018506765813040954, "loss": 1.4695, "step": 1403 }, { "epoch": 0.2011173184357542, "grad_norm": 1.1645184755325317, "learning_rate": 0.00018504325816128662, "loss": 1.3906, "step": 1404 }, { "epoch": 0.201260564389056, "grad_norm": 1.2833558320999146, "learning_rate": 0.00018501883988445466, "loss": 1.5119, "step": 1405 }, { "epoch": 0.20140381034235783, "grad_norm": 1.0259755849838257, "learning_rate": 0.00018499440330517039, "loss": 1.5412, "step": 1406 }, { "epoch": 0.20154705629565964, "grad_norm": 1.1516698598861694, "learning_rate": 0.00018496994842869438, "loss": 1.4942, "step": 1407 }, { "epoch": 0.20169030224896148, "grad_norm": 1.1080873012542725, "learning_rate": 0.00018494547526029114, "loss": 1.5325, "step": 1408 }, { "epoch": 0.2018335482022633, "grad_norm": 1.0680632591247559, "learning_rate": 0.00018492098380522916, "loss": 1.6113, "step": 1409 }, { "epoch": 0.2019767941555651, "grad_norm": 1.056185245513916, "learning_rate": 0.0001848964740687808, "loss": 1.585, "step": 1410 }, { "epoch": 0.20212004010886692, "grad_norm": 1.4105743169784546, "learning_rate": 0.00018487194605622248, "loss": 1.5146, "step": 1411 }, { "epoch": 0.20226328606216876, "grad_norm": 1.1395865678787231, "learning_rate": 0.00018484739977283444, "loss": 1.6744, "step": 1412 }, { "epoch": 0.20240653201547057, "grad_norm": 0.964874804019928, "learning_rate": 0.00018482283522390085, "loss": 1.6188, "step": 1413 }, { "epoch": 0.20254977796877238, "grad_norm": 1.5245773792266846, "learning_rate": 0.00018479825241470986, "loss": 1.5057, "step": 1414 }, { "epoch": 0.2026930239220742, "grad_norm": 1.4016399383544922, "learning_rate": 0.00018477365135055357, "loss": 1.469, "step": 1415 }, { "epoch": 0.202836269875376, "grad_norm": 1.0630128383636475, "learning_rate": 0.00018474903203672796, "loss": 1.554, "step": 1416 }, { "epoch": 0.20297951582867785, "grad_norm": 1.1299080848693848, "learning_rate": 0.00018472439447853297, "loss": 1.6916, "step": 1417 }, { "epoch": 0.20312276178197966, "grad_norm": 1.0836881399154663, "learning_rate": 0.00018469973868127246, "loss": 1.5345, "step": 1418 }, { "epoch": 0.20326600773528147, "grad_norm": 1.3375639915466309, "learning_rate": 0.0001846750646502542, "loss": 1.4818, "step": 1419 }, { "epoch": 0.20340925368858329, "grad_norm": 1.122757911682129, "learning_rate": 0.0001846503723907899, "loss": 1.4005, "step": 1420 }, { "epoch": 0.20355249964188513, "grad_norm": 1.1756877899169922, "learning_rate": 0.00018462566190819522, "loss": 1.703, "step": 1421 }, { "epoch": 0.20369574559518694, "grad_norm": 1.2671613693237305, "learning_rate": 0.00018460093320778968, "loss": 1.665, "step": 1422 }, { "epoch": 0.20383899154848875, "grad_norm": 1.4893039464950562, "learning_rate": 0.00018457618629489673, "loss": 1.5189, "step": 1423 }, { "epoch": 0.20398223750179056, "grad_norm": 1.146140217781067, "learning_rate": 0.00018455142117484386, "loss": 1.5304, "step": 1424 }, { "epoch": 0.2041254834550924, "grad_norm": 1.17046058177948, "learning_rate": 0.0001845266378529623, "loss": 1.7892, "step": 1425 }, { "epoch": 0.20426872940839422, "grad_norm": 1.1615841388702393, "learning_rate": 0.00018450183633458733, "loss": 1.5839, "step": 1426 }, { "epoch": 0.20441197536169603, "grad_norm": 1.7441776990890503, "learning_rate": 0.0001844770166250581, "loss": 1.8265, "step": 1427 }, { "epoch": 0.20455522131499784, "grad_norm": 1.0648633241653442, "learning_rate": 0.00018445217872971767, "loss": 1.6114, "step": 1428 }, { "epoch": 0.20469846726829968, "grad_norm": 1.2427387237548828, "learning_rate": 0.000184427322653913, "loss": 1.5721, "step": 1429 }, { "epoch": 0.2048417132216015, "grad_norm": 1.0513112545013428, "learning_rate": 0.00018440244840299506, "loss": 1.5165, "step": 1430 }, { "epoch": 0.2049849591749033, "grad_norm": 1.2392395734786987, "learning_rate": 0.00018437755598231856, "loss": 1.4319, "step": 1431 }, { "epoch": 0.20512820512820512, "grad_norm": 1.514569640159607, "learning_rate": 0.00018435264539724234, "loss": 1.4045, "step": 1432 }, { "epoch": 0.20527145108150693, "grad_norm": 1.050420880317688, "learning_rate": 0.00018432771665312893, "loss": 1.519, "step": 1433 }, { "epoch": 0.20541469703480877, "grad_norm": 1.2610220909118652, "learning_rate": 0.0001843027697553449, "loss": 1.4593, "step": 1434 }, { "epoch": 0.2055579429881106, "grad_norm": 1.1642498970031738, "learning_rate": 0.00018427780470926073, "loss": 1.5324, "step": 1435 }, { "epoch": 0.2057011889414124, "grad_norm": 1.2829734086990356, "learning_rate": 0.00018425282152025076, "loss": 1.5211, "step": 1436 }, { "epoch": 0.2058444348947142, "grad_norm": 1.4340356588363647, "learning_rate": 0.00018422782019369323, "loss": 1.3409, "step": 1437 }, { "epoch": 0.20598768084801605, "grad_norm": 1.5705552101135254, "learning_rate": 0.0001842028007349704, "loss": 1.4458, "step": 1438 }, { "epoch": 0.20613092680131787, "grad_norm": 0.8794950842857361, "learning_rate": 0.0001841777631494682, "loss": 1.7866, "step": 1439 }, { "epoch": 0.20627417275461968, "grad_norm": 1.0647470951080322, "learning_rate": 0.00018415270744257667, "loss": 1.4891, "step": 1440 }, { "epoch": 0.2064174187079215, "grad_norm": 1.3066332340240479, "learning_rate": 0.0001841276336196897, "loss": 1.5876, "step": 1441 }, { "epoch": 0.20656066466122333, "grad_norm": 1.3406274318695068, "learning_rate": 0.00018410254168620504, "loss": 1.4268, "step": 1442 }, { "epoch": 0.20670391061452514, "grad_norm": 1.0838818550109863, "learning_rate": 0.00018407743164752438, "loss": 1.4043, "step": 1443 }, { "epoch": 0.20684715656782696, "grad_norm": 1.149341344833374, "learning_rate": 0.0001840523035090533, "loss": 1.446, "step": 1444 }, { "epoch": 0.20699040252112877, "grad_norm": 1.3102495670318604, "learning_rate": 0.0001840271572762012, "loss": 1.6143, "step": 1445 }, { "epoch": 0.2071336484744306, "grad_norm": 1.3083823919296265, "learning_rate": 0.00018400199295438152, "loss": 1.3857, "step": 1446 }, { "epoch": 0.20727689442773242, "grad_norm": 1.2183609008789062, "learning_rate": 0.00018397681054901146, "loss": 1.3833, "step": 1447 }, { "epoch": 0.20742014038103423, "grad_norm": 1.3878487348556519, "learning_rate": 0.0001839516100655122, "loss": 1.5222, "step": 1448 }, { "epoch": 0.20756338633433605, "grad_norm": 1.1470175981521606, "learning_rate": 0.0001839263915093088, "loss": 1.5015, "step": 1449 }, { "epoch": 0.2077066322876379, "grad_norm": 1.291468620300293, "learning_rate": 0.00018390115488583014, "loss": 1.5208, "step": 1450 }, { "epoch": 0.2078498782409397, "grad_norm": 1.122896671295166, "learning_rate": 0.00018387590020050904, "loss": 1.4962, "step": 1451 }, { "epoch": 0.2079931241942415, "grad_norm": 1.886612057685852, "learning_rate": 0.00018385062745878225, "loss": 1.4617, "step": 1452 }, { "epoch": 0.20813637014754333, "grad_norm": 1.3602330684661865, "learning_rate": 0.0001838253366660904, "loss": 1.5006, "step": 1453 }, { "epoch": 0.20827961610084514, "grad_norm": 1.2793840169906616, "learning_rate": 0.00018380002782787783, "loss": 1.604, "step": 1454 }, { "epoch": 0.20842286205414698, "grad_norm": 1.1887341737747192, "learning_rate": 0.00018377470094959307, "loss": 1.5572, "step": 1455 }, { "epoch": 0.2085661080074488, "grad_norm": 1.2990611791610718, "learning_rate": 0.00018374935603668826, "loss": 1.6974, "step": 1456 }, { "epoch": 0.2087093539607506, "grad_norm": 1.1259386539459229, "learning_rate": 0.00018372399309461962, "loss": 1.645, "step": 1457 }, { "epoch": 0.20885259991405242, "grad_norm": 1.1804717779159546, "learning_rate": 0.00018369861212884706, "loss": 1.6709, "step": 1458 }, { "epoch": 0.20899584586735426, "grad_norm": 1.2612565755844116, "learning_rate": 0.00018367321314483452, "loss": 1.5517, "step": 1459 }, { "epoch": 0.20913909182065607, "grad_norm": 1.1413666009902954, "learning_rate": 0.0001836477961480498, "loss": 1.5906, "step": 1460 }, { "epoch": 0.20928233777395788, "grad_norm": 1.257929801940918, "learning_rate": 0.00018362236114396457, "loss": 1.7267, "step": 1461 }, { "epoch": 0.2094255837272597, "grad_norm": 1.2549214363098145, "learning_rate": 0.00018359690813805427, "loss": 1.4221, "step": 1462 }, { "epoch": 0.20956882968056154, "grad_norm": 1.2725675106048584, "learning_rate": 0.00018357143713579837, "loss": 1.5109, "step": 1463 }, { "epoch": 0.20971207563386335, "grad_norm": 1.3137381076812744, "learning_rate": 0.0001835459481426801, "loss": 1.3757, "step": 1464 }, { "epoch": 0.20985532158716516, "grad_norm": 1.3137515783309937, "learning_rate": 0.00018352044116418668, "loss": 1.5882, "step": 1465 }, { "epoch": 0.20999856754046697, "grad_norm": 1.1340367794036865, "learning_rate": 0.00018349491620580906, "loss": 1.5905, "step": 1466 }, { "epoch": 0.21014181349376881, "grad_norm": 1.4691225290298462, "learning_rate": 0.00018346937327304216, "loss": 1.3293, "step": 1467 }, { "epoch": 0.21028505944707063, "grad_norm": 1.1611536741256714, "learning_rate": 0.00018344381237138472, "loss": 1.4509, "step": 1468 }, { "epoch": 0.21042830540037244, "grad_norm": 1.252271056175232, "learning_rate": 0.00018341823350633942, "loss": 1.5841, "step": 1469 }, { "epoch": 0.21057155135367425, "grad_norm": 1.3695937395095825, "learning_rate": 0.00018339263668341275, "loss": 1.6684, "step": 1470 }, { "epoch": 0.21071479730697606, "grad_norm": 1.146183729171753, "learning_rate": 0.00018336702190811498, "loss": 1.6243, "step": 1471 }, { "epoch": 0.2108580432602779, "grad_norm": 1.1380293369293213, "learning_rate": 0.00018334138918596046, "loss": 1.5668, "step": 1472 }, { "epoch": 0.21100128921357972, "grad_norm": 1.1479898691177368, "learning_rate": 0.00018331573852246722, "loss": 1.4759, "step": 1473 }, { "epoch": 0.21114453516688153, "grad_norm": 0.9555863738059998, "learning_rate": 0.00018329006992315723, "loss": 1.4898, "step": 1474 }, { "epoch": 0.21128778112018334, "grad_norm": 0.9860863089561462, "learning_rate": 0.00018326438339355628, "loss": 1.3669, "step": 1475 }, { "epoch": 0.21143102707348518, "grad_norm": 1.2669332027435303, "learning_rate": 0.00018323867893919405, "loss": 1.3633, "step": 1476 }, { "epoch": 0.211574273026787, "grad_norm": 1.15382719039917, "learning_rate": 0.00018321295656560408, "loss": 1.5659, "step": 1477 }, { "epoch": 0.2117175189800888, "grad_norm": 1.1834615468978882, "learning_rate": 0.00018318721627832377, "loss": 1.4881, "step": 1478 }, { "epoch": 0.21186076493339062, "grad_norm": 1.2377066612243652, "learning_rate": 0.0001831614580828944, "loss": 1.5968, "step": 1479 }, { "epoch": 0.21200401088669246, "grad_norm": 1.5492805242538452, "learning_rate": 0.000183135681984861, "loss": 1.3493, "step": 1480 }, { "epoch": 0.21214725683999427, "grad_norm": 1.1334848403930664, "learning_rate": 0.00018310988798977255, "loss": 1.7096, "step": 1481 }, { "epoch": 0.2122905027932961, "grad_norm": 1.097018837928772, "learning_rate": 0.00018308407610318183, "loss": 1.3083, "step": 1482 }, { "epoch": 0.2124337487465979, "grad_norm": 1.1167300939559937, "learning_rate": 0.00018305824633064557, "loss": 1.4817, "step": 1483 }, { "epoch": 0.21257699469989974, "grad_norm": 1.1967469453811646, "learning_rate": 0.00018303239867772426, "loss": 1.4046, "step": 1484 }, { "epoch": 0.21272024065320155, "grad_norm": 1.3095366954803467, "learning_rate": 0.00018300653314998224, "loss": 1.5943, "step": 1485 }, { "epoch": 0.21286348660650337, "grad_norm": 1.5506595373153687, "learning_rate": 0.00018298064975298773, "loss": 1.4374, "step": 1486 }, { "epoch": 0.21300673255980518, "grad_norm": 1.0912269353866577, "learning_rate": 0.0001829547484923128, "loss": 1.6117, "step": 1487 }, { "epoch": 0.213149978513107, "grad_norm": 1.237838625907898, "learning_rate": 0.00018292882937353326, "loss": 1.4815, "step": 1488 }, { "epoch": 0.21329322446640883, "grad_norm": 1.545166254043579, "learning_rate": 0.000182902892402229, "loss": 1.4443, "step": 1489 }, { "epoch": 0.21343647041971064, "grad_norm": 1.1019178628921509, "learning_rate": 0.0001828769375839835, "loss": 1.3695, "step": 1490 }, { "epoch": 0.21357971637301246, "grad_norm": 1.0788493156433105, "learning_rate": 0.00018285096492438424, "loss": 1.6622, "step": 1491 }, { "epoch": 0.21372296232631427, "grad_norm": 1.4582874774932861, "learning_rate": 0.00018282497442902244, "loss": 1.4786, "step": 1492 }, { "epoch": 0.2138662082796161, "grad_norm": 1.1142699718475342, "learning_rate": 0.0001827989661034933, "loss": 1.4218, "step": 1493 }, { "epoch": 0.21400945423291792, "grad_norm": 1.1761949062347412, "learning_rate": 0.00018277293995339565, "loss": 1.6064, "step": 1494 }, { "epoch": 0.21415270018621974, "grad_norm": 1.1660184860229492, "learning_rate": 0.00018274689598433237, "loss": 1.5647, "step": 1495 }, { "epoch": 0.21429594613952155, "grad_norm": 1.3660680055618286, "learning_rate": 0.0001827208342019101, "loss": 1.5302, "step": 1496 }, { "epoch": 0.2144391920928234, "grad_norm": 0.960205078125, "learning_rate": 0.00018269475461173918, "loss": 1.5379, "step": 1497 }, { "epoch": 0.2145824380461252, "grad_norm": 1.1545640230178833, "learning_rate": 0.000182668657219434, "loss": 1.61, "step": 1498 }, { "epoch": 0.21472568399942701, "grad_norm": 1.2488517761230469, "learning_rate": 0.00018264254203061264, "loss": 1.3874, "step": 1499 }, { "epoch": 0.21486892995272883, "grad_norm": 1.0373151302337646, "learning_rate": 0.00018261640905089708, "loss": 1.6404, "step": 1500 }, { "epoch": 0.21501217590603067, "grad_norm": 1.083553671836853, "learning_rate": 0.00018259025828591308, "loss": 1.4594, "step": 1501 }, { "epoch": 0.21515542185933248, "grad_norm": 1.507617473602295, "learning_rate": 0.00018256408974129027, "loss": 1.5502, "step": 1502 }, { "epoch": 0.2152986678126343, "grad_norm": 1.1398086547851562, "learning_rate": 0.00018253790342266207, "loss": 1.6201, "step": 1503 }, { "epoch": 0.2154419137659361, "grad_norm": 1.2936040163040161, "learning_rate": 0.00018251169933566577, "loss": 1.5975, "step": 1504 }, { "epoch": 0.21558515971923792, "grad_norm": 1.0466448068618774, "learning_rate": 0.00018248547748594244, "loss": 1.4843, "step": 1505 }, { "epoch": 0.21572840567253976, "grad_norm": 1.195206642150879, "learning_rate": 0.00018245923787913704, "loss": 1.1496, "step": 1506 }, { "epoch": 0.21587165162584157, "grad_norm": 1.3630177974700928, "learning_rate": 0.00018243298052089823, "loss": 1.4804, "step": 1507 }, { "epoch": 0.21601489757914338, "grad_norm": 1.1443976163864136, "learning_rate": 0.00018240670541687864, "loss": 1.6369, "step": 1508 }, { "epoch": 0.2161581435324452, "grad_norm": 1.5311540365219116, "learning_rate": 0.00018238041257273463, "loss": 1.4739, "step": 1509 }, { "epoch": 0.21630138948574704, "grad_norm": 1.1093631982803345, "learning_rate": 0.00018235410199412636, "loss": 1.4604, "step": 1510 }, { "epoch": 0.21644463543904885, "grad_norm": 1.2489898204803467, "learning_rate": 0.0001823277736867179, "loss": 1.3782, "step": 1511 }, { "epoch": 0.21658788139235066, "grad_norm": 1.1612844467163086, "learning_rate": 0.00018230142765617705, "loss": 1.7413, "step": 1512 }, { "epoch": 0.21673112734565247, "grad_norm": 1.0746690034866333, "learning_rate": 0.0001822750639081755, "loss": 1.569, "step": 1513 }, { "epoch": 0.21687437329895431, "grad_norm": 1.423208475112915, "learning_rate": 0.0001822486824483886, "loss": 1.6716, "step": 1514 }, { "epoch": 0.21701761925225613, "grad_norm": 1.1169368028640747, "learning_rate": 0.0001822222832824958, "loss": 1.3822, "step": 1515 }, { "epoch": 0.21716086520555794, "grad_norm": 1.347452163696289, "learning_rate": 0.00018219586641618005, "loss": 1.4939, "step": 1516 }, { "epoch": 0.21730411115885975, "grad_norm": 1.5060954093933105, "learning_rate": 0.0001821694318551283, "loss": 1.5256, "step": 1517 }, { "epoch": 0.2174473571121616, "grad_norm": 1.2865513563156128, "learning_rate": 0.00018214297960503125, "loss": 1.561, "step": 1518 }, { "epoch": 0.2175906030654634, "grad_norm": 1.2649914026260376, "learning_rate": 0.00018211650967158344, "loss": 1.4917, "step": 1519 }, { "epoch": 0.21773384901876522, "grad_norm": 1.3209419250488281, "learning_rate": 0.00018209002206048315, "loss": 1.6617, "step": 1520 }, { "epoch": 0.21787709497206703, "grad_norm": 1.1756374835968018, "learning_rate": 0.0001820635167774325, "loss": 1.3885, "step": 1521 }, { "epoch": 0.21802034092536887, "grad_norm": 1.0940485000610352, "learning_rate": 0.00018203699382813746, "loss": 1.6826, "step": 1522 }, { "epoch": 0.21816358687867068, "grad_norm": 1.1051404476165771, "learning_rate": 0.00018201045321830775, "loss": 1.3603, "step": 1523 }, { "epoch": 0.2183068328319725, "grad_norm": 1.2450709342956543, "learning_rate": 0.0001819838949536569, "loss": 1.628, "step": 1524 }, { "epoch": 0.2184500787852743, "grad_norm": 1.1604392528533936, "learning_rate": 0.00018195731903990225, "loss": 1.4947, "step": 1525 }, { "epoch": 0.21859332473857612, "grad_norm": 1.123118281364441, "learning_rate": 0.00018193072548276494, "loss": 1.5528, "step": 1526 }, { "epoch": 0.21873657069187796, "grad_norm": 1.1758928298950195, "learning_rate": 0.00018190411428796991, "loss": 1.4828, "step": 1527 }, { "epoch": 0.21887981664517978, "grad_norm": 1.14836585521698, "learning_rate": 0.0001818774854612459, "loss": 1.6918, "step": 1528 }, { "epoch": 0.2190230625984816, "grad_norm": 1.1046864986419678, "learning_rate": 0.00018185083900832544, "loss": 1.5385, "step": 1529 }, { "epoch": 0.2191663085517834, "grad_norm": 1.1961406469345093, "learning_rate": 0.00018182417493494478, "loss": 1.5787, "step": 1530 }, { "epoch": 0.21930955450508524, "grad_norm": 1.1146355867385864, "learning_rate": 0.00018179749324684412, "loss": 1.6995, "step": 1531 }, { "epoch": 0.21945280045838705, "grad_norm": 1.060309886932373, "learning_rate": 0.00018177079394976736, "loss": 1.4269, "step": 1532 }, { "epoch": 0.21959604641168887, "grad_norm": 1.0514110326766968, "learning_rate": 0.00018174407704946212, "loss": 1.5451, "step": 1533 }, { "epoch": 0.21973929236499068, "grad_norm": 1.3559415340423584, "learning_rate": 0.00018171734255167997, "loss": 1.3571, "step": 1534 }, { "epoch": 0.21988253831829252, "grad_norm": 1.224799394607544, "learning_rate": 0.00018169059046217617, "loss": 1.5162, "step": 1535 }, { "epoch": 0.22002578427159433, "grad_norm": 1.1984279155731201, "learning_rate": 0.00018166382078670977, "loss": 1.2535, "step": 1536 }, { "epoch": 0.22016903022489615, "grad_norm": 1.5035969018936157, "learning_rate": 0.0001816370335310436, "loss": 1.4727, "step": 1537 }, { "epoch": 0.22031227617819796, "grad_norm": 1.2182730436325073, "learning_rate": 0.00018161022870094432, "loss": 1.626, "step": 1538 }, { "epoch": 0.2204555221314998, "grad_norm": 1.2001169919967651, "learning_rate": 0.00018158340630218235, "loss": 1.6117, "step": 1539 }, { "epoch": 0.2205987680848016, "grad_norm": 1.2035704851150513, "learning_rate": 0.0001815565663405319, "loss": 1.4128, "step": 1540 }, { "epoch": 0.22074201403810342, "grad_norm": 1.2661808729171753, "learning_rate": 0.0001815297088217709, "loss": 1.5654, "step": 1541 }, { "epoch": 0.22088525999140524, "grad_norm": 1.1710599660873413, "learning_rate": 0.00018150283375168114, "loss": 1.462, "step": 1542 }, { "epoch": 0.22102850594470705, "grad_norm": 1.1481469869613647, "learning_rate": 0.00018147594113604817, "loss": 1.3322, "step": 1543 }, { "epoch": 0.2211717518980089, "grad_norm": 1.0221550464630127, "learning_rate": 0.00018144903098066126, "loss": 1.5989, "step": 1544 }, { "epoch": 0.2213149978513107, "grad_norm": 1.0604640245437622, "learning_rate": 0.00018142210329131358, "loss": 1.3891, "step": 1545 }, { "epoch": 0.22145824380461251, "grad_norm": 1.3637282848358154, "learning_rate": 0.0001813951580738019, "loss": 1.4772, "step": 1546 }, { "epoch": 0.22160148975791433, "grad_norm": 1.3984220027923584, "learning_rate": 0.00018136819533392693, "loss": 1.3779, "step": 1547 }, { "epoch": 0.22174473571121617, "grad_norm": 1.1531269550323486, "learning_rate": 0.00018134121507749304, "loss": 1.4907, "step": 1548 }, { "epoch": 0.22188798166451798, "grad_norm": 1.262723684310913, "learning_rate": 0.0001813142173103084, "loss": 1.5794, "step": 1549 }, { "epoch": 0.2220312276178198, "grad_norm": 1.372413992881775, "learning_rate": 0.00018128720203818504, "loss": 1.591, "step": 1550 }, { "epoch": 0.2221744735711216, "grad_norm": 1.2786822319030762, "learning_rate": 0.0001812601692669386, "loss": 1.6051, "step": 1551 }, { "epoch": 0.22231771952442345, "grad_norm": 1.2073897123336792, "learning_rate": 0.0001812331190023886, "loss": 1.6959, "step": 1552 }, { "epoch": 0.22246096547772526, "grad_norm": 1.0315661430358887, "learning_rate": 0.00018120605125035829, "loss": 1.55, "step": 1553 }, { "epoch": 0.22260421143102707, "grad_norm": 1.2762595415115356, "learning_rate": 0.00018117896601667469, "loss": 1.5479, "step": 1554 }, { "epoch": 0.22274745738432888, "grad_norm": 1.0317778587341309, "learning_rate": 0.00018115186330716854, "loss": 1.5641, "step": 1555 }, { "epoch": 0.22289070333763072, "grad_norm": 1.1282223463058472, "learning_rate": 0.00018112474312767445, "loss": 1.4049, "step": 1556 }, { "epoch": 0.22303394929093254, "grad_norm": 1.262637972831726, "learning_rate": 0.00018109760548403066, "loss": 1.6847, "step": 1557 }, { "epoch": 0.22317719524423435, "grad_norm": 1.115439534187317, "learning_rate": 0.00018107045038207931, "loss": 1.5362, "step": 1558 }, { "epoch": 0.22332044119753616, "grad_norm": 1.4673593044281006, "learning_rate": 0.00018104327782766615, "loss": 1.3601, "step": 1559 }, { "epoch": 0.22346368715083798, "grad_norm": 1.3072248697280884, "learning_rate": 0.00018101608782664078, "loss": 1.5443, "step": 1560 }, { "epoch": 0.22360693310413982, "grad_norm": 1.3873815536499023, "learning_rate": 0.00018098888038485652, "loss": 1.3851, "step": 1561 }, { "epoch": 0.22375017905744163, "grad_norm": 1.455453872680664, "learning_rate": 0.0001809616555081705, "loss": 1.555, "step": 1562 }, { "epoch": 0.22389342501074344, "grad_norm": 1.2670179605484009, "learning_rate": 0.00018093441320244353, "loss": 1.4691, "step": 1563 }, { "epoch": 0.22403667096404525, "grad_norm": 1.1840388774871826, "learning_rate": 0.00018090715347354023, "loss": 1.4754, "step": 1564 }, { "epoch": 0.2241799169173471, "grad_norm": 1.1661396026611328, "learning_rate": 0.0001808798763273289, "loss": 1.4071, "step": 1565 }, { "epoch": 0.2243231628706489, "grad_norm": 1.1518869400024414, "learning_rate": 0.0001808525817696817, "loss": 1.6451, "step": 1566 }, { "epoch": 0.22446640882395072, "grad_norm": 1.1735764741897583, "learning_rate": 0.0001808252698064744, "loss": 1.6268, "step": 1567 }, { "epoch": 0.22460965477725253, "grad_norm": 1.5874416828155518, "learning_rate": 0.00018079794044358668, "loss": 1.4834, "step": 1568 }, { "epoch": 0.22475290073055437, "grad_norm": 1.0743379592895508, "learning_rate": 0.00018077059368690174, "loss": 1.6421, "step": 1569 }, { "epoch": 0.22489614668385619, "grad_norm": 1.1248879432678223, "learning_rate": 0.00018074322954230677, "loss": 1.4636, "step": 1570 }, { "epoch": 0.225039392637158, "grad_norm": 1.206067442893982, "learning_rate": 0.0001807158480156926, "loss": 1.5657, "step": 1571 }, { "epoch": 0.2251826385904598, "grad_norm": 1.5736825466156006, "learning_rate": 0.00018068844911295372, "loss": 1.5803, "step": 1572 }, { "epoch": 0.22532588454376165, "grad_norm": 1.0210436582565308, "learning_rate": 0.00018066103283998852, "loss": 1.4687, "step": 1573 }, { "epoch": 0.22546913049706346, "grad_norm": 1.370393991470337, "learning_rate": 0.00018063359920269896, "loss": 1.4709, "step": 1574 }, { "epoch": 0.22561237645036528, "grad_norm": 1.1733421087265015, "learning_rate": 0.00018060614820699085, "loss": 1.5174, "step": 1575 }, { "epoch": 0.2257556224036671, "grad_norm": 1.3706817626953125, "learning_rate": 0.00018057867985877377, "loss": 1.5157, "step": 1576 }, { "epoch": 0.22589886835696893, "grad_norm": 1.1381696462631226, "learning_rate": 0.0001805511941639609, "loss": 1.6675, "step": 1577 }, { "epoch": 0.22604211431027074, "grad_norm": 1.1196262836456299, "learning_rate": 0.0001805236911284693, "loss": 1.4569, "step": 1578 }, { "epoch": 0.22618536026357255, "grad_norm": 1.0801070928573608, "learning_rate": 0.00018049617075821962, "loss": 1.3081, "step": 1579 }, { "epoch": 0.22632860621687437, "grad_norm": 1.0941091775894165, "learning_rate": 0.00018046863305913632, "loss": 1.4623, "step": 1580 }, { "epoch": 0.22647185217017618, "grad_norm": 1.41851806640625, "learning_rate": 0.00018044107803714764, "loss": 1.6527, "step": 1581 }, { "epoch": 0.22661509812347802, "grad_norm": 1.2155038118362427, "learning_rate": 0.00018041350569818546, "loss": 1.5218, "step": 1582 }, { "epoch": 0.22675834407677983, "grad_norm": 1.4116519689559937, "learning_rate": 0.00018038591604818544, "loss": 1.3852, "step": 1583 }, { "epoch": 0.22690159003008165, "grad_norm": 1.3979288339614868, "learning_rate": 0.00018035830909308694, "loss": 1.5874, "step": 1584 }, { "epoch": 0.22704483598338346, "grad_norm": 1.2356114387512207, "learning_rate": 0.00018033068483883305, "loss": 1.5841, "step": 1585 }, { "epoch": 0.2271880819366853, "grad_norm": 1.0980898141860962, "learning_rate": 0.00018030304329137055, "loss": 1.4187, "step": 1586 }, { "epoch": 0.2273313278899871, "grad_norm": 1.2686175107955933, "learning_rate": 0.00018027538445665006, "loss": 1.3722, "step": 1587 }, { "epoch": 0.22747457384328892, "grad_norm": 1.0811446905136108, "learning_rate": 0.00018024770834062582, "loss": 1.5286, "step": 1588 }, { "epoch": 0.22761781979659074, "grad_norm": 1.1239668130874634, "learning_rate": 0.00018022001494925576, "loss": 1.5191, "step": 1589 }, { "epoch": 0.22776106574989258, "grad_norm": 1.0656206607818604, "learning_rate": 0.00018019230428850165, "loss": 1.7238, "step": 1590 }, { "epoch": 0.2279043117031944, "grad_norm": 1.3077001571655273, "learning_rate": 0.00018016457636432884, "loss": 1.5561, "step": 1591 }, { "epoch": 0.2280475576564962, "grad_norm": 1.1455917358398438, "learning_rate": 0.00018013683118270652, "loss": 1.5712, "step": 1592 }, { "epoch": 0.22819080360979802, "grad_norm": 1.208787441253662, "learning_rate": 0.00018010906874960754, "loss": 1.4216, "step": 1593 }, { "epoch": 0.22833404956309986, "grad_norm": 1.0071102380752563, "learning_rate": 0.00018008128907100844, "loss": 1.5824, "step": 1594 }, { "epoch": 0.22847729551640167, "grad_norm": 1.1912972927093506, "learning_rate": 0.0001800534921528895, "loss": 1.6535, "step": 1595 }, { "epoch": 0.22862054146970348, "grad_norm": 1.4216176271438599, "learning_rate": 0.00018002567800123474, "loss": 1.4237, "step": 1596 }, { "epoch": 0.2287637874230053, "grad_norm": 1.387319803237915, "learning_rate": 0.00017999784662203178, "loss": 1.2825, "step": 1597 }, { "epoch": 0.2289070333763071, "grad_norm": 1.398067831993103, "learning_rate": 0.00017996999802127214, "loss": 1.5442, "step": 1598 }, { "epoch": 0.22905027932960895, "grad_norm": 1.1122944355010986, "learning_rate": 0.00017994213220495084, "loss": 1.6582, "step": 1599 }, { "epoch": 0.22919352528291076, "grad_norm": 1.4073266983032227, "learning_rate": 0.00017991424917906677, "loss": 1.3174, "step": 1600 }, { "epoch": 0.22933677123621257, "grad_norm": 1.1288211345672607, "learning_rate": 0.0001798863489496224, "loss": 1.5664, "step": 1601 }, { "epoch": 0.22948001718951438, "grad_norm": 1.2065292596817017, "learning_rate": 0.00017985843152262397, "loss": 1.538, "step": 1602 }, { "epoch": 0.22962326314281623, "grad_norm": 1.2226641178131104, "learning_rate": 0.00017983049690408146, "loss": 1.5167, "step": 1603 }, { "epoch": 0.22976650909611804, "grad_norm": 1.327951431274414, "learning_rate": 0.00017980254510000844, "loss": 1.5346, "step": 1604 }, { "epoch": 0.22990975504941985, "grad_norm": 1.0875424146652222, "learning_rate": 0.00017977457611642226, "loss": 1.5092, "step": 1605 }, { "epoch": 0.23005300100272166, "grad_norm": 1.277587652206421, "learning_rate": 0.00017974658995934396, "loss": 1.4052, "step": 1606 }, { "epoch": 0.2301962469560235, "grad_norm": 1.008635401725769, "learning_rate": 0.00017971858663479826, "loss": 1.5983, "step": 1607 }, { "epoch": 0.23033949290932532, "grad_norm": 1.410136342048645, "learning_rate": 0.0001796905661488136, "loss": 1.4776, "step": 1608 }, { "epoch": 0.23048273886262713, "grad_norm": 1.2685760259628296, "learning_rate": 0.0001796625285074221, "loss": 1.3976, "step": 1609 }, { "epoch": 0.23062598481592894, "grad_norm": 1.0443456172943115, "learning_rate": 0.0001796344737166595, "loss": 1.6023, "step": 1610 }, { "epoch": 0.23076923076923078, "grad_norm": 1.1500216722488403, "learning_rate": 0.0001796064017825654, "loss": 1.5464, "step": 1611 }, { "epoch": 0.2309124767225326, "grad_norm": 1.2528773546218872, "learning_rate": 0.00017957831271118294, "loss": 1.6987, "step": 1612 }, { "epoch": 0.2310557226758344, "grad_norm": 1.1374362707138062, "learning_rate": 0.000179550206508559, "loss": 1.607, "step": 1613 }, { "epoch": 0.23119896862913622, "grad_norm": 1.2306231260299683, "learning_rate": 0.00017952208318074416, "loss": 1.4653, "step": 1614 }, { "epoch": 0.23134221458243803, "grad_norm": 1.2008354663848877, "learning_rate": 0.0001794939427337927, "loss": 1.501, "step": 1615 }, { "epoch": 0.23148546053573987, "grad_norm": 1.3857522010803223, "learning_rate": 0.0001794657851737625, "loss": 1.3772, "step": 1616 }, { "epoch": 0.23162870648904169, "grad_norm": 1.1792274713516235, "learning_rate": 0.00017943761050671526, "loss": 1.4868, "step": 1617 }, { "epoch": 0.2317719524423435, "grad_norm": 1.3106101751327515, "learning_rate": 0.00017940941873871626, "loss": 1.495, "step": 1618 }, { "epoch": 0.2319151983956453, "grad_norm": 1.1484472751617432, "learning_rate": 0.00017938120987583445, "loss": 1.6104, "step": 1619 }, { "epoch": 0.23205844434894715, "grad_norm": 1.16348397731781, "learning_rate": 0.00017935298392414257, "loss": 1.5255, "step": 1620 }, { "epoch": 0.23220169030224896, "grad_norm": 1.138157606124878, "learning_rate": 0.00017932474088971692, "loss": 1.5399, "step": 1621 }, { "epoch": 0.23234493625555078, "grad_norm": 1.0980589389801025, "learning_rate": 0.0001792964807786375, "loss": 1.5398, "step": 1622 }, { "epoch": 0.2324881822088526, "grad_norm": 1.0768204927444458, "learning_rate": 0.0001792682035969881, "loss": 1.5361, "step": 1623 }, { "epoch": 0.23263142816215443, "grad_norm": 0.9410218000411987, "learning_rate": 0.00017923990935085602, "loss": 1.5161, "step": 1624 }, { "epoch": 0.23277467411545624, "grad_norm": 1.1880786418914795, "learning_rate": 0.00017921159804633238, "loss": 1.315, "step": 1625 }, { "epoch": 0.23291792006875806, "grad_norm": 1.1521860361099243, "learning_rate": 0.00017918326968951182, "loss": 1.5468, "step": 1626 }, { "epoch": 0.23306116602205987, "grad_norm": 1.2211812734603882, "learning_rate": 0.00017915492428649277, "loss": 1.7284, "step": 1627 }, { "epoch": 0.2332044119753617, "grad_norm": 1.0255587100982666, "learning_rate": 0.00017912656184337734, "loss": 1.7276, "step": 1628 }, { "epoch": 0.23334765792866352, "grad_norm": 1.038635492324829, "learning_rate": 0.00017909818236627124, "loss": 1.5494, "step": 1629 }, { "epoch": 0.23349090388196533, "grad_norm": 1.132211685180664, "learning_rate": 0.0001790697858612838, "loss": 1.411, "step": 1630 }, { "epoch": 0.23363414983526715, "grad_norm": 1.2159178256988525, "learning_rate": 0.00017904137233452815, "loss": 1.46, "step": 1631 }, { "epoch": 0.23377739578856896, "grad_norm": 1.3370416164398193, "learning_rate": 0.000179012941792121, "loss": 1.4356, "step": 1632 }, { "epoch": 0.2339206417418708, "grad_norm": 1.1122897863388062, "learning_rate": 0.00017898449424018278, "loss": 1.4177, "step": 1633 }, { "epoch": 0.2340638876951726, "grad_norm": 1.1317368745803833, "learning_rate": 0.0001789560296848375, "loss": 1.4307, "step": 1634 }, { "epoch": 0.23420713364847442, "grad_norm": 1.1846390962600708, "learning_rate": 0.00017892754813221288, "loss": 1.389, "step": 1635 }, { "epoch": 0.23435037960177624, "grad_norm": 1.259944200515747, "learning_rate": 0.0001788990495884403, "loss": 1.496, "step": 1636 }, { "epoch": 0.23449362555507808, "grad_norm": 1.4734100103378296, "learning_rate": 0.00017887053405965482, "loss": 1.4312, "step": 1637 }, { "epoch": 0.2346368715083799, "grad_norm": 1.0750943422317505, "learning_rate": 0.00017884200155199507, "loss": 1.3276, "step": 1638 }, { "epoch": 0.2347801174616817, "grad_norm": 1.1855219602584839, "learning_rate": 0.00017881345207160343, "loss": 1.6613, "step": 1639 }, { "epoch": 0.23492336341498352, "grad_norm": 0.9636190533638, "learning_rate": 0.00017878488562462588, "loss": 1.5859, "step": 1640 }, { "epoch": 0.23506660936828536, "grad_norm": 1.403967022895813, "learning_rate": 0.00017875630221721208, "loss": 1.3911, "step": 1641 }, { "epoch": 0.23520985532158717, "grad_norm": 0.9366437792778015, "learning_rate": 0.00017872770185551535, "loss": 1.5734, "step": 1642 }, { "epoch": 0.23535310127488898, "grad_norm": 1.1883364915847778, "learning_rate": 0.00017869908454569257, "loss": 1.3841, "step": 1643 }, { "epoch": 0.2354963472281908, "grad_norm": 1.170893907546997, "learning_rate": 0.00017867045029390445, "loss": 1.5987, "step": 1644 }, { "epoch": 0.23563959318149263, "grad_norm": 1.1183254718780518, "learning_rate": 0.00017864179910631514, "loss": 1.545, "step": 1645 }, { "epoch": 0.23578283913479445, "grad_norm": 1.2897900342941284, "learning_rate": 0.00017861313098909256, "loss": 1.3466, "step": 1646 }, { "epoch": 0.23592608508809626, "grad_norm": 1.139197826385498, "learning_rate": 0.00017858444594840827, "loss": 1.5891, "step": 1647 }, { "epoch": 0.23606933104139807, "grad_norm": 1.2418596744537354, "learning_rate": 0.00017855574399043743, "loss": 1.5439, "step": 1648 }, { "epoch": 0.2362125769946999, "grad_norm": 1.104661226272583, "learning_rate": 0.00017852702512135884, "loss": 1.5013, "step": 1649 }, { "epoch": 0.23635582294800173, "grad_norm": 1.2875605821609497, "learning_rate": 0.00017849828934735497, "loss": 1.5453, "step": 1650 }, { "epoch": 0.23649906890130354, "grad_norm": 1.6004868745803833, "learning_rate": 0.00017846953667461198, "loss": 1.3004, "step": 1651 }, { "epoch": 0.23664231485460535, "grad_norm": 1.215222954750061, "learning_rate": 0.00017844076710931954, "loss": 1.2455, "step": 1652 }, { "epoch": 0.23678556080790716, "grad_norm": 1.2152013778686523, "learning_rate": 0.00017841198065767107, "loss": 1.4268, "step": 1653 }, { "epoch": 0.236928806761209, "grad_norm": 1.139962077140808, "learning_rate": 0.00017838317732586352, "loss": 1.5386, "step": 1654 }, { "epoch": 0.23707205271451082, "grad_norm": 0.9816656708717346, "learning_rate": 0.0001783543571200976, "loss": 1.4273, "step": 1655 }, { "epoch": 0.23721529866781263, "grad_norm": 1.1998710632324219, "learning_rate": 0.00017832552004657756, "loss": 1.45, "step": 1656 }, { "epoch": 0.23735854462111444, "grad_norm": 1.3308093547821045, "learning_rate": 0.0001782966661115113, "loss": 1.5399, "step": 1657 }, { "epoch": 0.23750179057441628, "grad_norm": 1.0545663833618164, "learning_rate": 0.00017826779532111037, "loss": 1.6803, "step": 1658 }, { "epoch": 0.2376450365277181, "grad_norm": 1.4090311527252197, "learning_rate": 0.00017823890768158996, "loss": 1.3388, "step": 1659 }, { "epoch": 0.2377882824810199, "grad_norm": 1.0986031293869019, "learning_rate": 0.0001782100031991688, "loss": 1.5565, "step": 1660 }, { "epoch": 0.23793152843432172, "grad_norm": 1.23817777633667, "learning_rate": 0.00017818108188006937, "loss": 1.3763, "step": 1661 }, { "epoch": 0.23807477438762356, "grad_norm": 1.0550546646118164, "learning_rate": 0.0001781521437305177, "loss": 1.5913, "step": 1662 }, { "epoch": 0.23821802034092537, "grad_norm": 1.0696789026260376, "learning_rate": 0.00017812318875674342, "loss": 1.4448, "step": 1663 }, { "epoch": 0.2383612662942272, "grad_norm": 0.9307172894477844, "learning_rate": 0.00017809421696497987, "loss": 1.5858, "step": 1664 }, { "epoch": 0.238504512247529, "grad_norm": 1.2478545904159546, "learning_rate": 0.00017806522836146395, "loss": 1.4587, "step": 1665 }, { "epoch": 0.23864775820083084, "grad_norm": 1.224277377128601, "learning_rate": 0.00017803622295243615, "loss": 1.3342, "step": 1666 }, { "epoch": 0.23879100415413265, "grad_norm": 1.021478533744812, "learning_rate": 0.00017800720074414068, "loss": 1.6131, "step": 1667 }, { "epoch": 0.23893425010743446, "grad_norm": 1.0162409543991089, "learning_rate": 0.00017797816174282524, "loss": 1.6935, "step": 1668 }, { "epoch": 0.23907749606073628, "grad_norm": 1.1760752201080322, "learning_rate": 0.00017794910595474126, "loss": 1.3912, "step": 1669 }, { "epoch": 0.2392207420140381, "grad_norm": 1.0430537462234497, "learning_rate": 0.00017792003338614368, "loss": 1.3529, "step": 1670 }, { "epoch": 0.23936398796733993, "grad_norm": 1.2220804691314697, "learning_rate": 0.00017789094404329115, "loss": 1.5991, "step": 1671 }, { "epoch": 0.23950723392064174, "grad_norm": 1.2312926054000854, "learning_rate": 0.0001778618379324459, "loss": 1.4976, "step": 1672 }, { "epoch": 0.23965047987394356, "grad_norm": 1.2138832807540894, "learning_rate": 0.00017783271505987367, "loss": 1.5458, "step": 1673 }, { "epoch": 0.23979372582724537, "grad_norm": 1.0128521919250488, "learning_rate": 0.00017780357543184397, "loss": 1.5235, "step": 1674 }, { "epoch": 0.2399369717805472, "grad_norm": 1.0863696336746216, "learning_rate": 0.00017777441905462982, "loss": 1.646, "step": 1675 }, { "epoch": 0.24008021773384902, "grad_norm": 1.2562332153320312, "learning_rate": 0.00017774524593450785, "loss": 1.5221, "step": 1676 }, { "epoch": 0.24022346368715083, "grad_norm": 1.2026699781417847, "learning_rate": 0.00017771605607775834, "loss": 1.4357, "step": 1677 }, { "epoch": 0.24036670964045265, "grad_norm": 1.2922749519348145, "learning_rate": 0.00017768684949066515, "loss": 1.556, "step": 1678 }, { "epoch": 0.2405099555937545, "grad_norm": 1.2074483633041382, "learning_rate": 0.0001776576261795157, "loss": 1.5278, "step": 1679 }, { "epoch": 0.2406532015470563, "grad_norm": 1.2819236516952515, "learning_rate": 0.00017762838615060104, "loss": 1.5061, "step": 1680 }, { "epoch": 0.2407964475003581, "grad_norm": 1.1819390058517456, "learning_rate": 0.00017759912941021585, "loss": 1.3712, "step": 1681 }, { "epoch": 0.24093969345365993, "grad_norm": 1.1596397161483765, "learning_rate": 0.0001775698559646584, "loss": 1.5251, "step": 1682 }, { "epoch": 0.24108293940696177, "grad_norm": 1.276223063468933, "learning_rate": 0.00017754056582023052, "loss": 1.5252, "step": 1683 }, { "epoch": 0.24122618536026358, "grad_norm": 1.2092808485031128, "learning_rate": 0.00017751125898323765, "loss": 1.4462, "step": 1684 }, { "epoch": 0.2413694313135654, "grad_norm": 1.1401129961013794, "learning_rate": 0.00017748193545998883, "loss": 1.517, "step": 1685 }, { "epoch": 0.2415126772668672, "grad_norm": 1.019573450088501, "learning_rate": 0.00017745259525679666, "loss": 1.6774, "step": 1686 }, { "epoch": 0.24165592322016902, "grad_norm": 1.0991650819778442, "learning_rate": 0.0001774232383799774, "loss": 1.5439, "step": 1687 }, { "epoch": 0.24179916917347086, "grad_norm": 1.1929291486740112, "learning_rate": 0.00017739386483585083, "loss": 1.5512, "step": 1688 }, { "epoch": 0.24194241512677267, "grad_norm": 1.2396111488342285, "learning_rate": 0.00017736447463074037, "loss": 1.4041, "step": 1689 }, { "epoch": 0.24208566108007448, "grad_norm": 1.1199657917022705, "learning_rate": 0.000177335067770973, "loss": 1.5615, "step": 1690 }, { "epoch": 0.2422289070333763, "grad_norm": 1.2150777578353882, "learning_rate": 0.0001773056442628793, "loss": 1.5394, "step": 1691 }, { "epoch": 0.24237215298667814, "grad_norm": 1.1789854764938354, "learning_rate": 0.00017727620411279337, "loss": 1.5883, "step": 1692 }, { "epoch": 0.24251539893997995, "grad_norm": 1.1000840663909912, "learning_rate": 0.00017724674732705301, "loss": 1.5895, "step": 1693 }, { "epoch": 0.24265864489328176, "grad_norm": 1.2763997316360474, "learning_rate": 0.0001772172739119995, "loss": 1.6236, "step": 1694 }, { "epoch": 0.24280189084658357, "grad_norm": 1.0187008380889893, "learning_rate": 0.00017718778387397775, "loss": 1.486, "step": 1695 }, { "epoch": 0.24294513679988541, "grad_norm": 1.150473713874817, "learning_rate": 0.00017715827721933623, "loss": 1.4881, "step": 1696 }, { "epoch": 0.24308838275318723, "grad_norm": 1.050354242324829, "learning_rate": 0.00017712875395442702, "loss": 1.6785, "step": 1697 }, { "epoch": 0.24323162870648904, "grad_norm": 1.1723244190216064, "learning_rate": 0.0001770992140856057, "loss": 1.3832, "step": 1698 }, { "epoch": 0.24337487465979085, "grad_norm": 1.0287948846817017, "learning_rate": 0.0001770696576192315, "loss": 1.4747, "step": 1699 }, { "epoch": 0.2435181206130927, "grad_norm": 1.2437360286712646, "learning_rate": 0.00017704008456166718, "loss": 1.5576, "step": 1700 }, { "epoch": 0.2436613665663945, "grad_norm": 1.2614781856536865, "learning_rate": 0.00017701049491927916, "loss": 1.5181, "step": 1701 }, { "epoch": 0.24380461251969632, "grad_norm": 1.356305718421936, "learning_rate": 0.00017698088869843722, "loss": 1.2966, "step": 1702 }, { "epoch": 0.24394785847299813, "grad_norm": 1.1504061222076416, "learning_rate": 0.00017695126590551495, "loss": 1.5534, "step": 1703 }, { "epoch": 0.24409110442629997, "grad_norm": 1.174885630607605, "learning_rate": 0.00017692162654688943, "loss": 1.5194, "step": 1704 }, { "epoch": 0.24423435037960178, "grad_norm": 1.1576462984085083, "learning_rate": 0.00017689197062894116, "loss": 1.5961, "step": 1705 }, { "epoch": 0.2443775963329036, "grad_norm": 1.4788256883621216, "learning_rate": 0.00017686229815805441, "loss": 1.4422, "step": 1706 }, { "epoch": 0.2445208422862054, "grad_norm": 1.0189123153686523, "learning_rate": 0.00017683260914061694, "loss": 1.5468, "step": 1707 }, { "epoch": 0.24466408823950722, "grad_norm": 1.221017837524414, "learning_rate": 0.00017680290358302, "loss": 1.5931, "step": 1708 }, { "epoch": 0.24480733419280906, "grad_norm": 1.057818055152893, "learning_rate": 0.00017677318149165853, "loss": 1.5742, "step": 1709 }, { "epoch": 0.24495058014611087, "grad_norm": 1.3094794750213623, "learning_rate": 0.00017674344287293089, "loss": 1.6952, "step": 1710 }, { "epoch": 0.2450938260994127, "grad_norm": 1.2983174324035645, "learning_rate": 0.00017671368773323912, "loss": 1.5533, "step": 1711 }, { "epoch": 0.2452370720527145, "grad_norm": 1.2301416397094727, "learning_rate": 0.00017668391607898878, "loss": 1.6913, "step": 1712 }, { "epoch": 0.24538031800601634, "grad_norm": 1.3136086463928223, "learning_rate": 0.00017665412791658888, "loss": 1.4652, "step": 1713 }, { "epoch": 0.24552356395931815, "grad_norm": 1.0584815740585327, "learning_rate": 0.00017662432325245218, "loss": 1.5523, "step": 1714 }, { "epoch": 0.24566680991261997, "grad_norm": 1.1548343896865845, "learning_rate": 0.00017659450209299484, "loss": 1.4768, "step": 1715 }, { "epoch": 0.24581005586592178, "grad_norm": 1.2268909215927124, "learning_rate": 0.00017656466444463658, "loss": 1.4709, "step": 1716 }, { "epoch": 0.24595330181922362, "grad_norm": 1.257816195487976, "learning_rate": 0.0001765348103138008, "loss": 1.4373, "step": 1717 }, { "epoch": 0.24609654777252543, "grad_norm": 1.1751478910446167, "learning_rate": 0.0001765049397069143, "loss": 1.4415, "step": 1718 }, { "epoch": 0.24623979372582724, "grad_norm": 1.1696817874908447, "learning_rate": 0.00017647505263040743, "loss": 1.5036, "step": 1719 }, { "epoch": 0.24638303967912906, "grad_norm": 1.1908512115478516, "learning_rate": 0.00017644514909071423, "loss": 1.5009, "step": 1720 }, { "epoch": 0.2465262856324309, "grad_norm": 1.2070001363754272, "learning_rate": 0.00017641522909427214, "loss": 1.4688, "step": 1721 }, { "epoch": 0.2466695315857327, "grad_norm": 0.9402499198913574, "learning_rate": 0.00017638529264752225, "loss": 1.5808, "step": 1722 }, { "epoch": 0.24681277753903452, "grad_norm": 1.0196526050567627, "learning_rate": 0.00017635533975690905, "loss": 1.5524, "step": 1723 }, { "epoch": 0.24695602349233634, "grad_norm": 1.006087064743042, "learning_rate": 0.0001763253704288807, "loss": 1.62, "step": 1724 }, { "epoch": 0.24709926944563815, "grad_norm": 1.0902975797653198, "learning_rate": 0.00017629538466988888, "loss": 1.6567, "step": 1725 }, { "epoch": 0.24724251539894, "grad_norm": 1.2466589212417603, "learning_rate": 0.00017626538248638872, "loss": 1.5318, "step": 1726 }, { "epoch": 0.2473857613522418, "grad_norm": 1.2524625062942505, "learning_rate": 0.00017623536388483905, "loss": 1.6266, "step": 1727 }, { "epoch": 0.2475290073055436, "grad_norm": 1.467842936515808, "learning_rate": 0.000176205328871702, "loss": 1.5037, "step": 1728 }, { "epoch": 0.24767225325884543, "grad_norm": 0.9722418189048767, "learning_rate": 0.00017617527745344348, "loss": 1.6168, "step": 1729 }, { "epoch": 0.24781549921214727, "grad_norm": 1.1312811374664307, "learning_rate": 0.00017614520963653277, "loss": 1.7052, "step": 1730 }, { "epoch": 0.24795874516544908, "grad_norm": 1.1263035535812378, "learning_rate": 0.0001761151254274427, "loss": 1.5386, "step": 1731 }, { "epoch": 0.2481019911187509, "grad_norm": 1.2759530544281006, "learning_rate": 0.0001760850248326497, "loss": 1.4436, "step": 1732 }, { "epoch": 0.2482452370720527, "grad_norm": 1.198973298072815, "learning_rate": 0.00017605490785863368, "loss": 1.6428, "step": 1733 }, { "epoch": 0.24838848302535455, "grad_norm": 1.0669065713882446, "learning_rate": 0.00017602477451187802, "loss": 1.4647, "step": 1734 }, { "epoch": 0.24853172897865636, "grad_norm": 1.157007098197937, "learning_rate": 0.00017599462479886974, "loss": 1.4387, "step": 1735 }, { "epoch": 0.24867497493195817, "grad_norm": 1.0468538999557495, "learning_rate": 0.00017596445872609934, "loss": 1.4636, "step": 1736 }, { "epoch": 0.24881822088525998, "grad_norm": 0.9225525259971619, "learning_rate": 0.0001759342763000608, "loss": 1.6646, "step": 1737 }, { "epoch": 0.24896146683856182, "grad_norm": 1.3613708019256592, "learning_rate": 0.00017590407752725166, "loss": 1.5077, "step": 1738 }, { "epoch": 0.24910471279186364, "grad_norm": 1.1746326684951782, "learning_rate": 0.00017587386241417293, "loss": 1.4209, "step": 1739 }, { "epoch": 0.24924795874516545, "grad_norm": 1.1406930685043335, "learning_rate": 0.00017584363096732926, "loss": 1.4518, "step": 1740 }, { "epoch": 0.24939120469846726, "grad_norm": 1.1395173072814941, "learning_rate": 0.00017581338319322862, "loss": 1.7543, "step": 1741 }, { "epoch": 0.24953445065176907, "grad_norm": 0.9967325925827026, "learning_rate": 0.0001757831190983827, "loss": 1.5416, "step": 1742 }, { "epoch": 0.24967769660507091, "grad_norm": 1.0967161655426025, "learning_rate": 0.00017575283868930658, "loss": 1.5241, "step": 1743 }, { "epoch": 0.24982094255837273, "grad_norm": 1.072513461112976, "learning_rate": 0.0001757225419725189, "loss": 1.4348, "step": 1744 }, { "epoch": 0.24996418851167454, "grad_norm": 1.2924058437347412, "learning_rate": 0.00017569222895454174, "loss": 1.5236, "step": 1745 }, { "epoch": 0.25010743446497635, "grad_norm": 1.1272467374801636, "learning_rate": 0.0001756618996419008, "loss": 1.5072, "step": 1746 }, { "epoch": 0.2502506804182782, "grad_norm": 1.1392260789871216, "learning_rate": 0.00017563155404112522, "loss": 1.5721, "step": 1747 }, { "epoch": 0.25039392637158, "grad_norm": 1.1709660291671753, "learning_rate": 0.00017560119215874763, "loss": 1.4299, "step": 1748 }, { "epoch": 0.2505371723248818, "grad_norm": 1.3311039209365845, "learning_rate": 0.0001755708140013042, "loss": 1.5245, "step": 1749 }, { "epoch": 0.25068041827818366, "grad_norm": 1.2343685626983643, "learning_rate": 0.0001755404195753346, "loss": 1.6023, "step": 1750 }, { "epoch": 0.25082366423148544, "grad_norm": 1.0501316785812378, "learning_rate": 0.000175510008887382, "loss": 1.558, "step": 1751 }, { "epoch": 0.2509669101847873, "grad_norm": 0.9963275194168091, "learning_rate": 0.00017547958194399308, "loss": 1.5513, "step": 1752 }, { "epoch": 0.2511101561380891, "grad_norm": 1.0529346466064453, "learning_rate": 0.000175449138751718, "loss": 1.7666, "step": 1753 }, { "epoch": 0.2512534020913909, "grad_norm": 1.128468632698059, "learning_rate": 0.00017541867931711042, "loss": 1.4772, "step": 1754 }, { "epoch": 0.25139664804469275, "grad_norm": 1.3378150463104248, "learning_rate": 0.0001753882036467275, "loss": 1.4729, "step": 1755 }, { "epoch": 0.25153989399799453, "grad_norm": 0.9767134189605713, "learning_rate": 0.00017535771174712988, "loss": 1.4067, "step": 1756 }, { "epoch": 0.2516831399512964, "grad_norm": 1.0687772035598755, "learning_rate": 0.0001753272036248818, "loss": 1.5425, "step": 1757 }, { "epoch": 0.2518263859045982, "grad_norm": 1.1526464223861694, "learning_rate": 0.00017529667928655078, "loss": 1.513, "step": 1758 }, { "epoch": 0.2519696318579, "grad_norm": 1.2871372699737549, "learning_rate": 0.00017526613873870806, "loss": 1.5584, "step": 1759 }, { "epoch": 0.25211287781120184, "grad_norm": 1.1934125423431396, "learning_rate": 0.00017523558198792816, "loss": 1.4383, "step": 1760 }, { "epoch": 0.2522561237645036, "grad_norm": 1.0536959171295166, "learning_rate": 0.0001752050090407893, "loss": 1.565, "step": 1761 }, { "epoch": 0.25239936971780547, "grad_norm": 1.4676578044891357, "learning_rate": 0.000175174419903873, "loss": 1.4668, "step": 1762 }, { "epoch": 0.2525426156711073, "grad_norm": 1.408983588218689, "learning_rate": 0.00017514381458376437, "loss": 1.4313, "step": 1763 }, { "epoch": 0.2526858616244091, "grad_norm": 1.0105700492858887, "learning_rate": 0.00017511319308705198, "loss": 1.5167, "step": 1764 }, { "epoch": 0.25282910757771093, "grad_norm": 1.017472505569458, "learning_rate": 0.0001750825554203279, "loss": 1.5071, "step": 1765 }, { "epoch": 0.2529723535310128, "grad_norm": 1.105825662612915, "learning_rate": 0.00017505190159018764, "loss": 1.4043, "step": 1766 }, { "epoch": 0.25311559948431456, "grad_norm": 1.534285306930542, "learning_rate": 0.0001750212316032302, "loss": 1.4089, "step": 1767 }, { "epoch": 0.2532588454376164, "grad_norm": 1.4834009408950806, "learning_rate": 0.00017499054546605812, "loss": 1.4654, "step": 1768 }, { "epoch": 0.2534020913909182, "grad_norm": 0.9281497597694397, "learning_rate": 0.0001749598431852773, "loss": 1.5428, "step": 1769 }, { "epoch": 0.25354533734422, "grad_norm": 1.2559762001037598, "learning_rate": 0.00017492912476749722, "loss": 1.5603, "step": 1770 }, { "epoch": 0.25368858329752186, "grad_norm": 0.982282817363739, "learning_rate": 0.00017489839021933077, "loss": 1.4609, "step": 1771 }, { "epoch": 0.25383182925082365, "grad_norm": 1.3256710767745972, "learning_rate": 0.00017486763954739436, "loss": 1.4468, "step": 1772 }, { "epoch": 0.2539750752041255, "grad_norm": 0.9639436602592468, "learning_rate": 0.00017483687275830783, "loss": 1.352, "step": 1773 }, { "epoch": 0.25411832115742733, "grad_norm": 1.022750973701477, "learning_rate": 0.00017480608985869452, "loss": 1.5195, "step": 1774 }, { "epoch": 0.2542615671107291, "grad_norm": 1.0688238143920898, "learning_rate": 0.00017477529085518128, "loss": 1.4605, "step": 1775 }, { "epoch": 0.25440481306403095, "grad_norm": 1.2416025400161743, "learning_rate": 0.00017474447575439826, "loss": 1.4796, "step": 1776 }, { "epoch": 0.25454805901733274, "grad_norm": 1.5684871673583984, "learning_rate": 0.00017471364456297925, "loss": 1.5657, "step": 1777 }, { "epoch": 0.2546913049706346, "grad_norm": 1.3178629875183105, "learning_rate": 0.00017468279728756147, "loss": 1.3458, "step": 1778 }, { "epoch": 0.2548345509239364, "grad_norm": 0.9080681204795837, "learning_rate": 0.00017465193393478555, "loss": 1.5403, "step": 1779 }, { "epoch": 0.2549777968772382, "grad_norm": 1.1326262950897217, "learning_rate": 0.0001746210545112956, "loss": 1.4565, "step": 1780 }, { "epoch": 0.25512104283054005, "grad_norm": 1.1706405878067017, "learning_rate": 0.00017459015902373916, "loss": 1.3311, "step": 1781 }, { "epoch": 0.25526428878384183, "grad_norm": 1.2679508924484253, "learning_rate": 0.00017455924747876734, "loss": 1.5045, "step": 1782 }, { "epoch": 0.25540753473714367, "grad_norm": 1.0095635652542114, "learning_rate": 0.00017452831988303458, "loss": 1.5044, "step": 1783 }, { "epoch": 0.2555507806904455, "grad_norm": 1.1225614547729492, "learning_rate": 0.0001744973762431988, "loss": 1.5232, "step": 1784 }, { "epoch": 0.2556940266437473, "grad_norm": 1.2272361516952515, "learning_rate": 0.0001744664165659215, "loss": 1.3776, "step": 1785 }, { "epoch": 0.25583727259704914, "grad_norm": 1.1880406141281128, "learning_rate": 0.00017443544085786746, "loss": 1.5442, "step": 1786 }, { "epoch": 0.255980518550351, "grad_norm": 1.1158430576324463, "learning_rate": 0.000174404449125705, "loss": 1.6801, "step": 1787 }, { "epoch": 0.25612376450365276, "grad_norm": 1.192739725112915, "learning_rate": 0.00017437344137610582, "loss": 1.4992, "step": 1788 }, { "epoch": 0.2562670104569546, "grad_norm": 1.2527509927749634, "learning_rate": 0.0001743424176157452, "loss": 1.4861, "step": 1789 }, { "epoch": 0.2564102564102564, "grad_norm": 1.2842694520950317, "learning_rate": 0.00017431137785130178, "loss": 1.5096, "step": 1790 }, { "epoch": 0.25655350236355823, "grad_norm": 1.2836097478866577, "learning_rate": 0.00017428032208945758, "loss": 1.569, "step": 1791 }, { "epoch": 0.25669674831686007, "grad_norm": 1.1481493711471558, "learning_rate": 0.00017424925033689826, "loss": 1.5641, "step": 1792 }, { "epoch": 0.25683999427016185, "grad_norm": 1.244872808456421, "learning_rate": 0.00017421816260031265, "loss": 1.5275, "step": 1793 }, { "epoch": 0.2569832402234637, "grad_norm": 1.20480215549469, "learning_rate": 0.00017418705888639328, "loss": 1.3315, "step": 1794 }, { "epoch": 0.25712648617676553, "grad_norm": 1.0637485980987549, "learning_rate": 0.00017415593920183596, "loss": 1.4912, "step": 1795 }, { "epoch": 0.2572697321300673, "grad_norm": 1.3856927156448364, "learning_rate": 0.00017412480355334005, "loss": 1.5383, "step": 1796 }, { "epoch": 0.25741297808336916, "grad_norm": 1.0708727836608887, "learning_rate": 0.00017409365194760818, "loss": 1.5053, "step": 1797 }, { "epoch": 0.25755622403667094, "grad_norm": 1.5909911394119263, "learning_rate": 0.00017406248439134662, "loss": 1.4566, "step": 1798 }, { "epoch": 0.2576994699899728, "grad_norm": 1.2129840850830078, "learning_rate": 0.0001740313008912649, "loss": 1.5322, "step": 1799 }, { "epoch": 0.2578427159432746, "grad_norm": 1.1843641996383667, "learning_rate": 0.0001740001014540761, "loss": 1.4607, "step": 1800 }, { "epoch": 0.2579859618965764, "grad_norm": 1.1128901243209839, "learning_rate": 0.0001739688860864967, "loss": 1.3882, "step": 1801 }, { "epoch": 0.25812920784987825, "grad_norm": 1.0887901782989502, "learning_rate": 0.00017393765479524652, "loss": 1.5362, "step": 1802 }, { "epoch": 0.25827245380318004, "grad_norm": 1.221447229385376, "learning_rate": 0.000173906407587049, "loss": 1.7137, "step": 1803 }, { "epoch": 0.2584156997564819, "grad_norm": 1.2307121753692627, "learning_rate": 0.00017387514446863078, "loss": 1.3769, "step": 1804 }, { "epoch": 0.2585589457097837, "grad_norm": 1.1925400495529175, "learning_rate": 0.0001738438654467221, "loss": 1.7529, "step": 1805 }, { "epoch": 0.2587021916630855, "grad_norm": 1.0972455739974976, "learning_rate": 0.00017381257052805652, "loss": 1.6218, "step": 1806 }, { "epoch": 0.25884543761638734, "grad_norm": 0.8442767262458801, "learning_rate": 0.0001737812597193711, "loss": 1.5325, "step": 1807 }, { "epoch": 0.2589886835696892, "grad_norm": 1.13351571559906, "learning_rate": 0.0001737499330274063, "loss": 1.3995, "step": 1808 }, { "epoch": 0.25913192952299097, "grad_norm": 1.4445209503173828, "learning_rate": 0.0001737185904589059, "loss": 1.5527, "step": 1809 }, { "epoch": 0.2592751754762928, "grad_norm": 1.145247459411621, "learning_rate": 0.00017368723202061724, "loss": 1.4185, "step": 1810 }, { "epoch": 0.2594184214295946, "grad_norm": 1.223759412765503, "learning_rate": 0.000173655857719291, "loss": 1.5832, "step": 1811 }, { "epoch": 0.25956166738289643, "grad_norm": 1.2860922813415527, "learning_rate": 0.00017362446756168128, "loss": 1.4601, "step": 1812 }, { "epoch": 0.2597049133361983, "grad_norm": 1.2196576595306396, "learning_rate": 0.00017359306155454563, "loss": 1.5545, "step": 1813 }, { "epoch": 0.25984815928950006, "grad_norm": 1.0808385610580444, "learning_rate": 0.00017356163970464497, "loss": 1.4089, "step": 1814 }, { "epoch": 0.2599914052428019, "grad_norm": 1.0863580703735352, "learning_rate": 0.00017353020201874367, "loss": 1.5375, "step": 1815 }, { "epoch": 0.2601346511961037, "grad_norm": 1.0849562883377075, "learning_rate": 0.00017349874850360942, "loss": 1.7483, "step": 1816 }, { "epoch": 0.2602778971494055, "grad_norm": 0.9827321767807007, "learning_rate": 0.00017346727916601345, "loss": 1.6488, "step": 1817 }, { "epoch": 0.26042114310270736, "grad_norm": 1.4656785726547241, "learning_rate": 0.00017343579401273034, "loss": 1.4981, "step": 1818 }, { "epoch": 0.26056438905600915, "grad_norm": 1.0635572671890259, "learning_rate": 0.00017340429305053801, "loss": 1.6685, "step": 1819 }, { "epoch": 0.260707635009311, "grad_norm": 1.2255606651306152, "learning_rate": 0.00017337277628621785, "loss": 1.5059, "step": 1820 }, { "epoch": 0.26085088096261283, "grad_norm": 1.016801357269287, "learning_rate": 0.00017334124372655465, "loss": 1.5405, "step": 1821 }, { "epoch": 0.2609941269159146, "grad_norm": 1.137406826019287, "learning_rate": 0.00017330969537833658, "loss": 1.5363, "step": 1822 }, { "epoch": 0.26113737286921646, "grad_norm": 1.0736136436462402, "learning_rate": 0.00017327813124835525, "loss": 1.3897, "step": 1823 }, { "epoch": 0.26128061882251824, "grad_norm": 1.13479745388031, "learning_rate": 0.0001732465513434056, "loss": 1.5586, "step": 1824 }, { "epoch": 0.2614238647758201, "grad_norm": 1.194003701210022, "learning_rate": 0.00017321495567028606, "loss": 1.3579, "step": 1825 }, { "epoch": 0.2615671107291219, "grad_norm": 1.1439099311828613, "learning_rate": 0.0001731833442357983, "loss": 1.4746, "step": 1826 }, { "epoch": 0.2617103566824237, "grad_norm": 1.3568291664123535, "learning_rate": 0.00017315171704674755, "loss": 1.3914, "step": 1827 }, { "epoch": 0.26185360263572555, "grad_norm": 1.0243639945983887, "learning_rate": 0.00017312007410994235, "loss": 1.699, "step": 1828 }, { "epoch": 0.2619968485890274, "grad_norm": 1.2641977071762085, "learning_rate": 0.0001730884154321946, "loss": 1.5485, "step": 1829 }, { "epoch": 0.26214009454232917, "grad_norm": 1.1948013305664062, "learning_rate": 0.0001730567410203197, "loss": 1.4031, "step": 1830 }, { "epoch": 0.262283340495631, "grad_norm": 1.1585381031036377, "learning_rate": 0.00017302505088113634, "loss": 1.5109, "step": 1831 }, { "epoch": 0.2624265864489328, "grad_norm": 1.291182279586792, "learning_rate": 0.0001729933450214666, "loss": 1.5202, "step": 1832 }, { "epoch": 0.26256983240223464, "grad_norm": 1.3056670427322388, "learning_rate": 0.00017296162344813598, "loss": 1.6215, "step": 1833 }, { "epoch": 0.2627130783555365, "grad_norm": 1.1388869285583496, "learning_rate": 0.00017292988616797336, "loss": 1.5668, "step": 1834 }, { "epoch": 0.26285632430883826, "grad_norm": 0.9789540767669678, "learning_rate": 0.00017289813318781098, "loss": 1.5762, "step": 1835 }, { "epoch": 0.2629995702621401, "grad_norm": 1.2062697410583496, "learning_rate": 0.00017286636451448447, "loss": 1.3662, "step": 1836 }, { "epoch": 0.2631428162154419, "grad_norm": 1.0115976333618164, "learning_rate": 0.00017283458015483283, "loss": 1.7368, "step": 1837 }, { "epoch": 0.26328606216874373, "grad_norm": 1.151848316192627, "learning_rate": 0.00017280278011569847, "loss": 1.5255, "step": 1838 }, { "epoch": 0.26342930812204557, "grad_norm": 1.246985673904419, "learning_rate": 0.00017277096440392717, "loss": 1.377, "step": 1839 }, { "epoch": 0.26357255407534735, "grad_norm": 1.1437475681304932, "learning_rate": 0.00017273913302636798, "loss": 1.5936, "step": 1840 }, { "epoch": 0.2637158000286492, "grad_norm": 1.1832574605941772, "learning_rate": 0.00017270728598987352, "loss": 1.5206, "step": 1841 }, { "epoch": 0.26385904598195103, "grad_norm": 1.4420394897460938, "learning_rate": 0.00017267542330129957, "loss": 1.4458, "step": 1842 }, { "epoch": 0.2640022919352528, "grad_norm": 1.001261830329895, "learning_rate": 0.00017264354496750544, "loss": 1.4021, "step": 1843 }, { "epoch": 0.26414553788855466, "grad_norm": 1.3838437795639038, "learning_rate": 0.0001726116509953537, "loss": 1.6433, "step": 1844 }, { "epoch": 0.26428878384185645, "grad_norm": 1.4447978734970093, "learning_rate": 0.0001725797413917104, "loss": 1.2929, "step": 1845 }, { "epoch": 0.2644320297951583, "grad_norm": 1.1281362771987915, "learning_rate": 0.00017254781616344485, "loss": 1.4892, "step": 1846 }, { "epoch": 0.2645752757484601, "grad_norm": 1.043124794960022, "learning_rate": 0.00017251587531742971, "loss": 1.6243, "step": 1847 }, { "epoch": 0.2647185217017619, "grad_norm": 1.2842302322387695, "learning_rate": 0.00017248391886054114, "loss": 1.5771, "step": 1848 }, { "epoch": 0.26486176765506375, "grad_norm": 1.451529622077942, "learning_rate": 0.00017245194679965854, "loss": 1.4743, "step": 1849 }, { "epoch": 0.26500501360836554, "grad_norm": 1.3679934740066528, "learning_rate": 0.00017241995914166468, "loss": 1.4831, "step": 1850 }, { "epoch": 0.2651482595616674, "grad_norm": 0.9345340132713318, "learning_rate": 0.00017238795589344575, "loss": 1.7174, "step": 1851 }, { "epoch": 0.2652915055149692, "grad_norm": 1.5027233362197876, "learning_rate": 0.00017235593706189123, "loss": 1.3468, "step": 1852 }, { "epoch": 0.265434751468271, "grad_norm": 0.987837553024292, "learning_rate": 0.000172323902653894, "loss": 1.6088, "step": 1853 }, { "epoch": 0.26557799742157284, "grad_norm": 1.037922978401184, "learning_rate": 0.00017229185267635027, "loss": 1.5212, "step": 1854 }, { "epoch": 0.2657212433748747, "grad_norm": 1.0385154485702515, "learning_rate": 0.00017225978713615958, "loss": 1.5651, "step": 1855 }, { "epoch": 0.26586448932817647, "grad_norm": 1.1097460985183716, "learning_rate": 0.0001722277060402249, "loss": 1.6475, "step": 1856 }, { "epoch": 0.2660077352814783, "grad_norm": 1.4183669090270996, "learning_rate": 0.00017219560939545246, "loss": 1.4513, "step": 1857 }, { "epoch": 0.2661509812347801, "grad_norm": 1.0849918127059937, "learning_rate": 0.0001721634972087519, "loss": 1.5405, "step": 1858 }, { "epoch": 0.26629422718808193, "grad_norm": 0.98914635181427, "learning_rate": 0.0001721313694870361, "loss": 1.556, "step": 1859 }, { "epoch": 0.2664374731413838, "grad_norm": 1.02264404296875, "learning_rate": 0.00017209922623722147, "loss": 1.4175, "step": 1860 }, { "epoch": 0.26658071909468556, "grad_norm": 1.0332555770874023, "learning_rate": 0.0001720670674662276, "loss": 1.5259, "step": 1861 }, { "epoch": 0.2667239650479874, "grad_norm": 1.1522632837295532, "learning_rate": 0.00017203489318097753, "loss": 1.54, "step": 1862 }, { "epoch": 0.26686721100128924, "grad_norm": 1.1492314338684082, "learning_rate": 0.00017200270338839748, "loss": 1.5314, "step": 1863 }, { "epoch": 0.267010456954591, "grad_norm": 1.0583293437957764, "learning_rate": 0.00017197049809541717, "loss": 1.4963, "step": 1864 }, { "epoch": 0.26715370290789286, "grad_norm": 1.292770266532898, "learning_rate": 0.00017193827730896968, "loss": 1.567, "step": 1865 }, { "epoch": 0.26729694886119465, "grad_norm": 1.2695982456207275, "learning_rate": 0.00017190604103599127, "loss": 1.4201, "step": 1866 }, { "epoch": 0.2674401948144965, "grad_norm": 1.3297650814056396, "learning_rate": 0.0001718737892834216, "loss": 1.4772, "step": 1867 }, { "epoch": 0.26758344076779833, "grad_norm": 1.0892633199691772, "learning_rate": 0.00017184152205820368, "loss": 1.5657, "step": 1868 }, { "epoch": 0.2677266867211001, "grad_norm": 1.2263191938400269, "learning_rate": 0.00017180923936728387, "loss": 1.4843, "step": 1869 }, { "epoch": 0.26786993267440196, "grad_norm": 1.264400839805603, "learning_rate": 0.00017177694121761188, "loss": 1.5874, "step": 1870 }, { "epoch": 0.26801317862770374, "grad_norm": 1.2720884084701538, "learning_rate": 0.00017174462761614058, "loss": 1.4386, "step": 1871 }, { "epoch": 0.2681564245810056, "grad_norm": 1.2213882207870483, "learning_rate": 0.00017171229856982639, "loss": 1.3395, "step": 1872 }, { "epoch": 0.2682996705343074, "grad_norm": 1.2335898876190186, "learning_rate": 0.0001716799540856289, "loss": 1.2524, "step": 1873 }, { "epoch": 0.2684429164876092, "grad_norm": 1.3253637552261353, "learning_rate": 0.00017164759417051114, "loss": 1.4424, "step": 1874 }, { "epoch": 0.26858616244091105, "grad_norm": 1.155497431755066, "learning_rate": 0.00017161521883143934, "loss": 1.5293, "step": 1875 }, { "epoch": 0.2687294083942129, "grad_norm": 1.0171724557876587, "learning_rate": 0.00017158282807538312, "loss": 1.5982, "step": 1876 }, { "epoch": 0.2688726543475147, "grad_norm": 1.1201980113983154, "learning_rate": 0.00017155042190931542, "loss": 1.4978, "step": 1877 }, { "epoch": 0.2690159003008165, "grad_norm": 1.346705436706543, "learning_rate": 0.00017151800034021248, "loss": 1.6491, "step": 1878 }, { "epoch": 0.2691591462541183, "grad_norm": 1.1991596221923828, "learning_rate": 0.00017148556337505386, "loss": 1.4552, "step": 1879 }, { "epoch": 0.26930239220742014, "grad_norm": 1.1959863901138306, "learning_rate": 0.00017145311102082243, "loss": 1.4665, "step": 1880 }, { "epoch": 0.269445638160722, "grad_norm": 1.2051140069961548, "learning_rate": 0.00017142064328450442, "loss": 1.564, "step": 1881 }, { "epoch": 0.26958888411402376, "grad_norm": 1.2838658094406128, "learning_rate": 0.00017138816017308925, "loss": 1.392, "step": 1882 }, { "epoch": 0.2697321300673256, "grad_norm": 1.3614393472671509, "learning_rate": 0.00017135566169356983, "loss": 1.4089, "step": 1883 }, { "epoch": 0.26987537602062744, "grad_norm": 1.3757820129394531, "learning_rate": 0.00017132314785294218, "loss": 1.3859, "step": 1884 }, { "epoch": 0.27001862197392923, "grad_norm": 1.2666202783584595, "learning_rate": 0.00017129061865820582, "loss": 1.6483, "step": 1885 }, { "epoch": 0.27016186792723107, "grad_norm": 1.3503568172454834, "learning_rate": 0.0001712580741163634, "loss": 1.4915, "step": 1886 }, { "epoch": 0.27030511388053285, "grad_norm": 1.2183852195739746, "learning_rate": 0.00017122551423442097, "loss": 1.5049, "step": 1887 }, { "epoch": 0.2704483598338347, "grad_norm": 0.9913017749786377, "learning_rate": 0.0001711929390193879, "loss": 1.2689, "step": 1888 }, { "epoch": 0.27059160578713654, "grad_norm": 1.223230004310608, "learning_rate": 0.0001711603484782768, "loss": 1.3459, "step": 1889 }, { "epoch": 0.2707348517404383, "grad_norm": 1.1682639122009277, "learning_rate": 0.00017112774261810365, "loss": 1.4451, "step": 1890 }, { "epoch": 0.27087809769374016, "grad_norm": 1.2320588827133179, "learning_rate": 0.00017109512144588762, "loss": 1.471, "step": 1891 }, { "epoch": 0.27102134364704195, "grad_norm": 1.2000607252120972, "learning_rate": 0.0001710624849686513, "loss": 1.4219, "step": 1892 }, { "epoch": 0.2711645896003438, "grad_norm": 1.3722758293151855, "learning_rate": 0.00017102983319342046, "loss": 1.5727, "step": 1893 }, { "epoch": 0.2713078355536456, "grad_norm": 1.1525709629058838, "learning_rate": 0.00017099716612722427, "loss": 1.6695, "step": 1894 }, { "epoch": 0.2714510815069474, "grad_norm": 1.111646056175232, "learning_rate": 0.00017096448377709514, "loss": 1.5913, "step": 1895 }, { "epoch": 0.27159432746024925, "grad_norm": 1.3374725580215454, "learning_rate": 0.00017093178615006872, "loss": 1.4504, "step": 1896 }, { "epoch": 0.2717375734135511, "grad_norm": 1.209067702293396, "learning_rate": 0.00017089907325318403, "loss": 1.4779, "step": 1897 }, { "epoch": 0.2718808193668529, "grad_norm": 1.0878421068191528, "learning_rate": 0.00017086634509348337, "loss": 1.3987, "step": 1898 }, { "epoch": 0.2720240653201547, "grad_norm": 1.3177785873413086, "learning_rate": 0.00017083360167801225, "loss": 1.6204, "step": 1899 }, { "epoch": 0.2721673112734565, "grad_norm": 1.2348480224609375, "learning_rate": 0.00017080084301381956, "loss": 1.4488, "step": 1900 }, { "epoch": 0.27231055722675834, "grad_norm": 1.1994290351867676, "learning_rate": 0.0001707680691079574, "loss": 1.4117, "step": 1901 }, { "epoch": 0.2724538031800602, "grad_norm": 1.2006165981292725, "learning_rate": 0.00017073527996748123, "loss": 1.442, "step": 1902 }, { "epoch": 0.27259704913336197, "grad_norm": 1.205885887145996, "learning_rate": 0.0001707024755994497, "loss": 1.5712, "step": 1903 }, { "epoch": 0.2727402950866638, "grad_norm": 1.3983968496322632, "learning_rate": 0.00017066965601092478, "loss": 1.4837, "step": 1904 }, { "epoch": 0.2728835410399656, "grad_norm": 1.0385433435440063, "learning_rate": 0.00017063682120897175, "loss": 1.6284, "step": 1905 }, { "epoch": 0.27302678699326743, "grad_norm": 1.2428748607635498, "learning_rate": 0.00017060397120065909, "loss": 1.5576, "step": 1906 }, { "epoch": 0.2731700329465693, "grad_norm": 1.2136071920394897, "learning_rate": 0.0001705711059930586, "loss": 1.495, "step": 1907 }, { "epoch": 0.27331327889987106, "grad_norm": 1.1357100009918213, "learning_rate": 0.00017053822559324536, "loss": 1.5327, "step": 1908 }, { "epoch": 0.2734565248531729, "grad_norm": 1.517285943031311, "learning_rate": 0.0001705053300082977, "loss": 1.4914, "step": 1909 }, { "epoch": 0.27359977080647474, "grad_norm": 1.0948734283447266, "learning_rate": 0.00017047241924529721, "loss": 1.559, "step": 1910 }, { "epoch": 0.2737430167597765, "grad_norm": 1.1471765041351318, "learning_rate": 0.00017043949331132878, "loss": 1.4555, "step": 1911 }, { "epoch": 0.27388626271307837, "grad_norm": 1.1361476182937622, "learning_rate": 0.00017040655221348057, "loss": 1.5873, "step": 1912 }, { "epoch": 0.27402950866638015, "grad_norm": 1.1316759586334229, "learning_rate": 0.00017037359595884394, "loss": 1.4994, "step": 1913 }, { "epoch": 0.274172754619682, "grad_norm": 1.2050585746765137, "learning_rate": 0.0001703406245545136, "loss": 1.6988, "step": 1914 }, { "epoch": 0.27431600057298383, "grad_norm": 1.2752808332443237, "learning_rate": 0.00017030763800758743, "loss": 1.4632, "step": 1915 }, { "epoch": 0.2744592465262856, "grad_norm": 1.11107337474823, "learning_rate": 0.00017027463632516665, "loss": 1.5772, "step": 1916 }, { "epoch": 0.27460249247958746, "grad_norm": 1.1216106414794922, "learning_rate": 0.00017024161951435568, "loss": 1.2876, "step": 1917 }, { "epoch": 0.2747457384328893, "grad_norm": 1.3822041749954224, "learning_rate": 0.00017020858758226229, "loss": 1.5395, "step": 1918 }, { "epoch": 0.2748889843861911, "grad_norm": 0.9759470224380493, "learning_rate": 0.0001701755405359973, "loss": 1.5713, "step": 1919 }, { "epoch": 0.2750322303394929, "grad_norm": 1.0533623695373535, "learning_rate": 0.00017014247838267508, "loss": 1.6032, "step": 1920 }, { "epoch": 0.2751754762927947, "grad_norm": 1.5107101202011108, "learning_rate": 0.000170109401129413, "loss": 1.4686, "step": 1921 }, { "epoch": 0.27531872224609655, "grad_norm": 1.0671602487564087, "learning_rate": 0.0001700763087833318, "loss": 1.5392, "step": 1922 }, { "epoch": 0.2754619681993984, "grad_norm": 1.1020092964172363, "learning_rate": 0.00017004320135155542, "loss": 1.6509, "step": 1923 }, { "epoch": 0.2756052141527002, "grad_norm": 1.03842294216156, "learning_rate": 0.0001700100788412111, "loss": 1.5069, "step": 1924 }, { "epoch": 0.275748460106002, "grad_norm": 1.1630704402923584, "learning_rate": 0.0001699769412594293, "loss": 1.3128, "step": 1925 }, { "epoch": 0.2758917060593038, "grad_norm": 1.168697476387024, "learning_rate": 0.0001699437886133437, "loss": 1.3766, "step": 1926 }, { "epoch": 0.27603495201260564, "grad_norm": 1.1691465377807617, "learning_rate": 0.00016991062091009122, "loss": 1.4515, "step": 1927 }, { "epoch": 0.2761781979659075, "grad_norm": 1.1161352396011353, "learning_rate": 0.0001698774381568121, "loss": 1.4423, "step": 1928 }, { "epoch": 0.27632144391920926, "grad_norm": 1.354994535446167, "learning_rate": 0.00016984424036064975, "loss": 1.5517, "step": 1929 }, { "epoch": 0.2764646898725111, "grad_norm": 1.1025282144546509, "learning_rate": 0.0001698110275287508, "loss": 1.6438, "step": 1930 }, { "epoch": 0.27660793582581295, "grad_norm": 1.0699341297149658, "learning_rate": 0.00016977779966826522, "loss": 1.3712, "step": 1931 }, { "epoch": 0.27675118177911473, "grad_norm": 1.0224467515945435, "learning_rate": 0.00016974455678634608, "loss": 1.5837, "step": 1932 }, { "epoch": 0.27689442773241657, "grad_norm": 1.265955924987793, "learning_rate": 0.0001697112988901498, "loss": 1.5958, "step": 1933 }, { "epoch": 0.27703767368571836, "grad_norm": 0.9183480143547058, "learning_rate": 0.00016967802598683593, "loss": 1.4942, "step": 1934 }, { "epoch": 0.2771809196390202, "grad_norm": 1.1272201538085938, "learning_rate": 0.00016964473808356735, "loss": 1.6687, "step": 1935 }, { "epoch": 0.27732416559232204, "grad_norm": 0.9888427257537842, "learning_rate": 0.00016961143518751004, "loss": 1.6476, "step": 1936 }, { "epoch": 0.2774674115456238, "grad_norm": 1.028112530708313, "learning_rate": 0.0001695781173058334, "loss": 1.4354, "step": 1937 }, { "epoch": 0.27761065749892566, "grad_norm": 1.298352837562561, "learning_rate": 0.0001695447844457099, "loss": 1.4704, "step": 1938 }, { "epoch": 0.2777539034522275, "grad_norm": 1.4349368810653687, "learning_rate": 0.00016951143661431524, "loss": 1.3916, "step": 1939 }, { "epoch": 0.2778971494055293, "grad_norm": 1.2475242614746094, "learning_rate": 0.00016947807381882844, "loss": 1.4985, "step": 1940 }, { "epoch": 0.2780403953588311, "grad_norm": 1.2409940958023071, "learning_rate": 0.00016944469606643167, "loss": 1.5647, "step": 1941 }, { "epoch": 0.2781836413121329, "grad_norm": 1.1128641366958618, "learning_rate": 0.00016941130336431032, "loss": 1.5453, "step": 1942 }, { "epoch": 0.27832688726543475, "grad_norm": 1.2181445360183716, "learning_rate": 0.00016937789571965303, "loss": 1.4346, "step": 1943 }, { "epoch": 0.2784701332187366, "grad_norm": 1.1452646255493164, "learning_rate": 0.0001693444731396516, "loss": 1.5195, "step": 1944 }, { "epoch": 0.2786133791720384, "grad_norm": 1.1039472818374634, "learning_rate": 0.00016931103563150112, "loss": 1.5302, "step": 1945 }, { "epoch": 0.2787566251253402, "grad_norm": 1.3316044807434082, "learning_rate": 0.00016927758320239988, "loss": 1.2911, "step": 1946 }, { "epoch": 0.278899871078642, "grad_norm": 1.1934200525283813, "learning_rate": 0.00016924411585954928, "loss": 1.4202, "step": 1947 }, { "epoch": 0.27904311703194384, "grad_norm": 1.0135799646377563, "learning_rate": 0.00016921063361015413, "loss": 1.5612, "step": 1948 }, { "epoch": 0.2791863629852457, "grad_norm": 0.9171785116195679, "learning_rate": 0.00016917713646142222, "loss": 1.5431, "step": 1949 }, { "epoch": 0.27932960893854747, "grad_norm": 1.1183679103851318, "learning_rate": 0.0001691436244205647, "loss": 1.5461, "step": 1950 }, { "epoch": 0.2794728548918493, "grad_norm": 1.3267743587493896, "learning_rate": 0.00016911009749479586, "loss": 1.5698, "step": 1951 }, { "epoch": 0.27961610084515115, "grad_norm": 1.3369734287261963, "learning_rate": 0.00016907655569133327, "loss": 1.437, "step": 1952 }, { "epoch": 0.27975934679845293, "grad_norm": 1.0967153310775757, "learning_rate": 0.0001690429990173976, "loss": 1.587, "step": 1953 }, { "epoch": 0.2799025927517548, "grad_norm": 1.1468194723129272, "learning_rate": 0.0001690094274802128, "loss": 1.5188, "step": 1954 }, { "epoch": 0.28004583870505656, "grad_norm": 1.0509079694747925, "learning_rate": 0.00016897584108700598, "loss": 1.6605, "step": 1955 }, { "epoch": 0.2801890846583584, "grad_norm": 1.168953776359558, "learning_rate": 0.00016894223984500746, "loss": 1.5138, "step": 1956 }, { "epoch": 0.28033233061166024, "grad_norm": 0.9107540249824524, "learning_rate": 0.00016890862376145075, "loss": 1.5842, "step": 1957 }, { "epoch": 0.280475576564962, "grad_norm": 0.8969243168830872, "learning_rate": 0.00016887499284357258, "loss": 1.5129, "step": 1958 }, { "epoch": 0.28061882251826387, "grad_norm": 1.2624499797821045, "learning_rate": 0.00016884134709861285, "loss": 1.4811, "step": 1959 }, { "epoch": 0.28076206847156565, "grad_norm": 1.0120142698287964, "learning_rate": 0.00016880768653381462, "loss": 1.4556, "step": 1960 }, { "epoch": 0.2809053144248675, "grad_norm": 1.2255308628082275, "learning_rate": 0.0001687740111564242, "loss": 1.4382, "step": 1961 }, { "epoch": 0.28104856037816933, "grad_norm": 1.2514064311981201, "learning_rate": 0.00016874032097369113, "loss": 1.4465, "step": 1962 }, { "epoch": 0.2811918063314711, "grad_norm": 1.1582660675048828, "learning_rate": 0.00016870661599286798, "loss": 1.3946, "step": 1963 }, { "epoch": 0.28133505228477296, "grad_norm": 1.0066133737564087, "learning_rate": 0.0001686728962212106, "loss": 1.5363, "step": 1964 }, { "epoch": 0.2814782982380748, "grad_norm": 1.346342921257019, "learning_rate": 0.0001686391616659781, "loss": 1.5577, "step": 1965 }, { "epoch": 0.2816215441913766, "grad_norm": 0.9517983198165894, "learning_rate": 0.00016860541233443263, "loss": 1.4439, "step": 1966 }, { "epoch": 0.2817647901446784, "grad_norm": 1.2255555391311646, "learning_rate": 0.0001685716482338396, "loss": 1.5787, "step": 1967 }, { "epoch": 0.2819080360979802, "grad_norm": 1.2086429595947266, "learning_rate": 0.0001685378693714676, "loss": 1.6298, "step": 1968 }, { "epoch": 0.28205128205128205, "grad_norm": 0.9574936628341675, "learning_rate": 0.00016850407575458838, "loss": 1.5551, "step": 1969 }, { "epoch": 0.2821945280045839, "grad_norm": 1.1391602754592896, "learning_rate": 0.00016847026739047683, "loss": 1.4595, "step": 1970 }, { "epoch": 0.2823377739578857, "grad_norm": 1.208785891532898, "learning_rate": 0.00016843644428641113, "loss": 1.4375, "step": 1971 }, { "epoch": 0.2824810199111875, "grad_norm": 1.231567621231079, "learning_rate": 0.00016840260644967247, "loss": 1.542, "step": 1972 }, { "epoch": 0.28262426586448935, "grad_norm": 0.9703326225280762, "learning_rate": 0.00016836875388754537, "loss": 1.4323, "step": 1973 }, { "epoch": 0.28276751181779114, "grad_norm": 1.1931341886520386, "learning_rate": 0.00016833488660731742, "loss": 1.4877, "step": 1974 }, { "epoch": 0.282910757771093, "grad_norm": 1.4200971126556396, "learning_rate": 0.0001683010046162794, "loss": 1.5347, "step": 1975 }, { "epoch": 0.28305400372439476, "grad_norm": 1.2097086906433105, "learning_rate": 0.0001682671079217253, "loss": 1.3702, "step": 1976 }, { "epoch": 0.2831972496776966, "grad_norm": 1.409982442855835, "learning_rate": 0.00016823319653095218, "loss": 1.4491, "step": 1977 }, { "epoch": 0.28334049563099845, "grad_norm": 1.1274410486221313, "learning_rate": 0.00016819927045126035, "loss": 1.4181, "step": 1978 }, { "epoch": 0.28348374158430023, "grad_norm": 1.1649516820907593, "learning_rate": 0.00016816532968995328, "loss": 1.4905, "step": 1979 }, { "epoch": 0.28362698753760207, "grad_norm": 1.1671640872955322, "learning_rate": 0.00016813137425433758, "loss": 1.471, "step": 1980 }, { "epoch": 0.28377023349090386, "grad_norm": 1.5519614219665527, "learning_rate": 0.00016809740415172297, "loss": 1.5533, "step": 1981 }, { "epoch": 0.2839134794442057, "grad_norm": 1.2579014301300049, "learning_rate": 0.0001680634193894224, "loss": 1.3766, "step": 1982 }, { "epoch": 0.28405672539750754, "grad_norm": 1.148878812789917, "learning_rate": 0.00016802941997475196, "loss": 1.3156, "step": 1983 }, { "epoch": 0.2841999713508093, "grad_norm": 1.2862945795059204, "learning_rate": 0.00016799540591503083, "loss": 1.6057, "step": 1984 }, { "epoch": 0.28434321730411116, "grad_norm": 1.0098503828048706, "learning_rate": 0.00016796137721758142, "loss": 1.5406, "step": 1985 }, { "epoch": 0.284486463257413, "grad_norm": 1.2319185733795166, "learning_rate": 0.00016792733388972932, "loss": 1.5553, "step": 1986 }, { "epoch": 0.2846297092107148, "grad_norm": 1.1229959726333618, "learning_rate": 0.00016789327593880318, "loss": 1.511, "step": 1987 }, { "epoch": 0.28477295516401663, "grad_norm": 1.0639524459838867, "learning_rate": 0.0001678592033721348, "loss": 1.4794, "step": 1988 }, { "epoch": 0.2849162011173184, "grad_norm": 1.2658641338348389, "learning_rate": 0.0001678251161970592, "loss": 1.6231, "step": 1989 }, { "epoch": 0.28505944707062025, "grad_norm": 1.185575246810913, "learning_rate": 0.00016779101442091447, "loss": 1.5016, "step": 1990 }, { "epoch": 0.2852026930239221, "grad_norm": 1.1228439807891846, "learning_rate": 0.00016775689805104192, "loss": 1.5584, "step": 1991 }, { "epoch": 0.2853459389772239, "grad_norm": 1.2919049263000488, "learning_rate": 0.00016772276709478597, "loss": 1.3909, "step": 1992 }, { "epoch": 0.2854891849305257, "grad_norm": 0.9523124694824219, "learning_rate": 0.00016768862155949413, "loss": 1.3926, "step": 1993 }, { "epoch": 0.28563243088382756, "grad_norm": 1.056960940361023, "learning_rate": 0.00016765446145251706, "loss": 1.6344, "step": 1994 }, { "epoch": 0.28577567683712934, "grad_norm": 1.2078131437301636, "learning_rate": 0.00016762028678120867, "loss": 1.6735, "step": 1995 }, { "epoch": 0.2859189227904312, "grad_norm": 1.0921590328216553, "learning_rate": 0.00016758609755292584, "loss": 1.729, "step": 1996 }, { "epoch": 0.28606216874373297, "grad_norm": 1.2056080102920532, "learning_rate": 0.00016755189377502876, "loss": 1.393, "step": 1997 }, { "epoch": 0.2862054146970348, "grad_norm": 1.1325101852416992, "learning_rate": 0.00016751767545488056, "loss": 1.4909, "step": 1998 }, { "epoch": 0.28634866065033665, "grad_norm": 1.0890710353851318, "learning_rate": 0.00016748344259984762, "loss": 1.5392, "step": 1999 }, { "epoch": 0.28649190660363844, "grad_norm": 1.3951345682144165, "learning_rate": 0.00016744919521729948, "loss": 1.4531, "step": 2000 }, { "epoch": 0.2866351525569403, "grad_norm": 1.0557584762573242, "learning_rate": 0.0001674149333146087, "loss": 1.4296, "step": 2001 }, { "epoch": 0.28677839851024206, "grad_norm": 1.0310229063034058, "learning_rate": 0.00016738065689915106, "loss": 1.3732, "step": 2002 }, { "epoch": 0.2869216444635439, "grad_norm": 1.0658046007156372, "learning_rate": 0.0001673463659783054, "loss": 1.3561, "step": 2003 }, { "epoch": 0.28706489041684574, "grad_norm": 1.177260160446167, "learning_rate": 0.00016731206055945366, "loss": 1.3102, "step": 2004 }, { "epoch": 0.2872081363701475, "grad_norm": 0.9633142948150635, "learning_rate": 0.00016727774064998106, "loss": 1.5386, "step": 2005 }, { "epoch": 0.28735138232344937, "grad_norm": 1.1098424196243286, "learning_rate": 0.00016724340625727573, "loss": 1.4217, "step": 2006 }, { "epoch": 0.2874946282767512, "grad_norm": 0.9937300086021423, "learning_rate": 0.00016720905738872905, "loss": 1.3578, "step": 2007 }, { "epoch": 0.287637874230053, "grad_norm": 1.161625862121582, "learning_rate": 0.00016717469405173549, "loss": 1.3959, "step": 2008 }, { "epoch": 0.28778112018335483, "grad_norm": 0.9400433897972107, "learning_rate": 0.00016714031625369264, "loss": 1.6437, "step": 2009 }, { "epoch": 0.2879243661366566, "grad_norm": 1.4957982301712036, "learning_rate": 0.0001671059240020011, "loss": 1.3158, "step": 2010 }, { "epoch": 0.28806761208995846, "grad_norm": 1.1123487949371338, "learning_rate": 0.00016707151730406482, "loss": 1.717, "step": 2011 }, { "epoch": 0.2882108580432603, "grad_norm": 1.150346040725708, "learning_rate": 0.00016703709616729055, "loss": 1.4951, "step": 2012 }, { "epoch": 0.2883541039965621, "grad_norm": 1.0847374200820923, "learning_rate": 0.00016700266059908842, "loss": 1.4901, "step": 2013 }, { "epoch": 0.2884973499498639, "grad_norm": 0.90064936876297, "learning_rate": 0.00016696821060687155, "loss": 1.5383, "step": 2014 }, { "epoch": 0.2886405959031657, "grad_norm": 1.0576918125152588, "learning_rate": 0.00016693374619805606, "loss": 1.4958, "step": 2015 }, { "epoch": 0.28878384185646755, "grad_norm": 1.3058196306228638, "learning_rate": 0.00016689926738006143, "loss": 1.5537, "step": 2016 }, { "epoch": 0.2889270878097694, "grad_norm": 1.2564175128936768, "learning_rate": 0.00016686477416031004, "loss": 1.5773, "step": 2017 }, { "epoch": 0.2890703337630712, "grad_norm": 1.1657989025115967, "learning_rate": 0.0001668302665462274, "loss": 1.5286, "step": 2018 }, { "epoch": 0.289213579716373, "grad_norm": 1.1100525856018066, "learning_rate": 0.00016679574454524213, "loss": 1.5379, "step": 2019 }, { "epoch": 0.28935682566967486, "grad_norm": 1.007617473602295, "learning_rate": 0.00016676120816478605, "loss": 1.5477, "step": 2020 }, { "epoch": 0.28950007162297664, "grad_norm": 0.9686825275421143, "learning_rate": 0.00016672665741229392, "loss": 1.6948, "step": 2021 }, { "epoch": 0.2896433175762785, "grad_norm": 1.1076608896255493, "learning_rate": 0.00016669209229520367, "loss": 1.4572, "step": 2022 }, { "epoch": 0.28978656352958027, "grad_norm": 0.9844187498092651, "learning_rate": 0.00016665751282095634, "loss": 1.3964, "step": 2023 }, { "epoch": 0.2899298094828821, "grad_norm": 1.141664981842041, "learning_rate": 0.000166622918996996, "loss": 1.5967, "step": 2024 }, { "epoch": 0.29007305543618395, "grad_norm": 1.1901179552078247, "learning_rate": 0.00016658831083076985, "loss": 1.5506, "step": 2025 }, { "epoch": 0.29021630138948573, "grad_norm": 0.9641210436820984, "learning_rate": 0.0001665536883297282, "loss": 1.5155, "step": 2026 }, { "epoch": 0.29035954734278757, "grad_norm": 1.2455899715423584, "learning_rate": 0.0001665190515013244, "loss": 1.3626, "step": 2027 }, { "epoch": 0.2905027932960894, "grad_norm": 1.0462678670883179, "learning_rate": 0.0001664844003530149, "loss": 1.55, "step": 2028 }, { "epoch": 0.2906460392493912, "grad_norm": 1.0634565353393555, "learning_rate": 0.00016644973489225926, "loss": 1.5275, "step": 2029 }, { "epoch": 0.29078928520269304, "grad_norm": 0.9982190728187561, "learning_rate": 0.00016641505512652005, "loss": 1.5624, "step": 2030 }, { "epoch": 0.2909325311559948, "grad_norm": 1.0724668502807617, "learning_rate": 0.00016638036106326296, "loss": 1.2859, "step": 2031 }, { "epoch": 0.29107577710929666, "grad_norm": 1.1475642919540405, "learning_rate": 0.00016634565270995684, "loss": 1.3716, "step": 2032 }, { "epoch": 0.2912190230625985, "grad_norm": 1.275540828704834, "learning_rate": 0.00016631093007407347, "loss": 1.4838, "step": 2033 }, { "epoch": 0.2913622690159003, "grad_norm": 1.113063931465149, "learning_rate": 0.0001662761931630878, "loss": 1.463, "step": 2034 }, { "epoch": 0.29150551496920213, "grad_norm": 0.8122738599777222, "learning_rate": 0.00016624144198447782, "loss": 1.5566, "step": 2035 }, { "epoch": 0.2916487609225039, "grad_norm": 1.1239324808120728, "learning_rate": 0.0001662066765457246, "loss": 1.3179, "step": 2036 }, { "epoch": 0.29179200687580575, "grad_norm": 1.2529375553131104, "learning_rate": 0.00016617189685431228, "loss": 1.5301, "step": 2037 }, { "epoch": 0.2919352528291076, "grad_norm": 1.1471854448318481, "learning_rate": 0.00016613710291772812, "loss": 1.4626, "step": 2038 }, { "epoch": 0.2920784987824094, "grad_norm": 1.1331422328948975, "learning_rate": 0.00016610229474346228, "loss": 1.4181, "step": 2039 }, { "epoch": 0.2922217447357112, "grad_norm": 1.0694282054901123, "learning_rate": 0.00016606747233900815, "loss": 1.5129, "step": 2040 }, { "epoch": 0.29236499068901306, "grad_norm": 1.1972836256027222, "learning_rate": 0.00016603263571186218, "loss": 1.5453, "step": 2041 }, { "epoch": 0.29250823664231484, "grad_norm": 1.229253888130188, "learning_rate": 0.0001659977848695238, "loss": 1.4401, "step": 2042 }, { "epoch": 0.2926514825956167, "grad_norm": 1.1006466150283813, "learning_rate": 0.0001659629198194955, "loss": 1.4402, "step": 2043 }, { "epoch": 0.29279472854891847, "grad_norm": 1.2078962326049805, "learning_rate": 0.00016592804056928285, "loss": 1.1869, "step": 2044 }, { "epoch": 0.2929379745022203, "grad_norm": 1.0909185409545898, "learning_rate": 0.0001658931471263946, "loss": 1.5252, "step": 2045 }, { "epoch": 0.29308122045552215, "grad_norm": 1.2717727422714233, "learning_rate": 0.0001658582394983424, "loss": 1.3947, "step": 2046 }, { "epoch": 0.29322446640882394, "grad_norm": 1.0644125938415527, "learning_rate": 0.0001658233176926409, "loss": 1.518, "step": 2047 }, { "epoch": 0.2933677123621258, "grad_norm": 1.2992795705795288, "learning_rate": 0.000165788381716808, "loss": 1.5907, "step": 2048 }, { "epoch": 0.29351095831542756, "grad_norm": 1.2054922580718994, "learning_rate": 0.00016575343157836455, "loss": 1.2582, "step": 2049 }, { "epoch": 0.2936542042687294, "grad_norm": 1.2525360584259033, "learning_rate": 0.00016571846728483442, "loss": 1.4705, "step": 2050 }, { "epoch": 0.29379745022203124, "grad_norm": 1.3702844381332397, "learning_rate": 0.00016568348884374453, "loss": 1.3646, "step": 2051 }, { "epoch": 0.293940696175333, "grad_norm": 1.1646143198013306, "learning_rate": 0.00016564849626262492, "loss": 1.5212, "step": 2052 }, { "epoch": 0.29408394212863487, "grad_norm": 1.1041207313537598, "learning_rate": 0.00016561348954900865, "loss": 1.5302, "step": 2053 }, { "epoch": 0.2942271880819367, "grad_norm": 1.0390639305114746, "learning_rate": 0.00016557846871043173, "loss": 1.3909, "step": 2054 }, { "epoch": 0.2943704340352385, "grad_norm": 1.0313633680343628, "learning_rate": 0.00016554343375443331, "loss": 1.6806, "step": 2055 }, { "epoch": 0.29451367998854033, "grad_norm": 1.3112484216690063, "learning_rate": 0.00016550838468855553, "loss": 1.578, "step": 2056 }, { "epoch": 0.2946569259418421, "grad_norm": 1.255183458328247, "learning_rate": 0.00016547332152034365, "loss": 1.5373, "step": 2057 }, { "epoch": 0.29480017189514396, "grad_norm": 1.277254343032837, "learning_rate": 0.00016543824425734583, "loss": 1.5696, "step": 2058 }, { "epoch": 0.2949434178484458, "grad_norm": 1.0825320482254028, "learning_rate": 0.00016540315290711338, "loss": 1.344, "step": 2059 }, { "epoch": 0.2950866638017476, "grad_norm": 1.1773914098739624, "learning_rate": 0.0001653680474772006, "loss": 1.5834, "step": 2060 }, { "epoch": 0.2952299097550494, "grad_norm": 1.3338125944137573, "learning_rate": 0.0001653329279751648, "loss": 1.3147, "step": 2061 }, { "epoch": 0.29537315570835126, "grad_norm": 1.0381402969360352, "learning_rate": 0.00016529779440856634, "loss": 1.5587, "step": 2062 }, { "epoch": 0.29551640166165305, "grad_norm": 1.1725177764892578, "learning_rate": 0.0001652626467849686, "loss": 1.5401, "step": 2063 }, { "epoch": 0.2956596476149549, "grad_norm": 1.2530937194824219, "learning_rate": 0.00016522748511193803, "loss": 1.4864, "step": 2064 }, { "epoch": 0.2958028935682567, "grad_norm": 1.280437707901001, "learning_rate": 0.00016519230939704402, "loss": 1.5546, "step": 2065 }, { "epoch": 0.2959461395215585, "grad_norm": 1.2502906322479248, "learning_rate": 0.00016515711964785906, "loss": 1.5615, "step": 2066 }, { "epoch": 0.29608938547486036, "grad_norm": 1.2065811157226562, "learning_rate": 0.00016512191587195866, "loss": 1.5018, "step": 2067 }, { "epoch": 0.29623263142816214, "grad_norm": 1.1206344366073608, "learning_rate": 0.00016508669807692127, "loss": 1.4378, "step": 2068 }, { "epoch": 0.296375877381464, "grad_norm": 1.1130290031433105, "learning_rate": 0.0001650514662703284, "loss": 1.5022, "step": 2069 }, { "epoch": 0.29651912333476577, "grad_norm": 1.1376516819000244, "learning_rate": 0.00016501622045976463, "loss": 1.5937, "step": 2070 }, { "epoch": 0.2966623692880676, "grad_norm": 0.9944444894790649, "learning_rate": 0.00016498096065281753, "loss": 1.5888, "step": 2071 }, { "epoch": 0.29680561524136945, "grad_norm": 1.234916090965271, "learning_rate": 0.00016494568685707762, "loss": 1.3764, "step": 2072 }, { "epoch": 0.29694886119467123, "grad_norm": 1.0958075523376465, "learning_rate": 0.00016491039908013847, "loss": 1.4478, "step": 2073 }, { "epoch": 0.2970921071479731, "grad_norm": 1.1120153665542603, "learning_rate": 0.00016487509732959668, "loss": 1.5635, "step": 2074 }, { "epoch": 0.2972353531012749, "grad_norm": 1.3901543617248535, "learning_rate": 0.00016483978161305188, "loss": 1.4523, "step": 2075 }, { "epoch": 0.2973785990545767, "grad_norm": 1.255515694618225, "learning_rate": 0.0001648044519381066, "loss": 1.5609, "step": 2076 }, { "epoch": 0.29752184500787854, "grad_norm": 1.0801960229873657, "learning_rate": 0.00016476910831236648, "loss": 1.4023, "step": 2077 }, { "epoch": 0.2976650909611803, "grad_norm": 1.359724998474121, "learning_rate": 0.0001647337507434401, "loss": 1.6331, "step": 2078 }, { "epoch": 0.29780833691448216, "grad_norm": 0.9753069877624512, "learning_rate": 0.00016469837923893915, "loss": 1.5339, "step": 2079 }, { "epoch": 0.297951582867784, "grad_norm": 1.0298526287078857, "learning_rate": 0.00016466299380647818, "loss": 1.463, "step": 2080 }, { "epoch": 0.2980948288210858, "grad_norm": 0.9451895356178284, "learning_rate": 0.00016462759445367477, "loss": 1.3969, "step": 2081 }, { "epoch": 0.29823807477438763, "grad_norm": 1.15664803981781, "learning_rate": 0.00016459218118814958, "loss": 1.4679, "step": 2082 }, { "epoch": 0.29838132072768947, "grad_norm": 1.0966644287109375, "learning_rate": 0.00016455675401752618, "loss": 1.2798, "step": 2083 }, { "epoch": 0.29852456668099125, "grad_norm": 1.2720539569854736, "learning_rate": 0.00016452131294943117, "loss": 1.4518, "step": 2084 }, { "epoch": 0.2986678126342931, "grad_norm": 1.0670486688613892, "learning_rate": 0.00016448585799149417, "loss": 1.5698, "step": 2085 }, { "epoch": 0.2988110585875949, "grad_norm": 1.2468509674072266, "learning_rate": 0.00016445038915134766, "loss": 1.5231, "step": 2086 }, { "epoch": 0.2989543045408967, "grad_norm": 1.2199418544769287, "learning_rate": 0.00016441490643662732, "loss": 1.4772, "step": 2087 }, { "epoch": 0.29909755049419856, "grad_norm": 1.2481852769851685, "learning_rate": 0.0001643794098549716, "loss": 1.5118, "step": 2088 }, { "epoch": 0.29924079644750035, "grad_norm": 1.1355080604553223, "learning_rate": 0.0001643438994140221, "loss": 1.6856, "step": 2089 }, { "epoch": 0.2993840424008022, "grad_norm": 0.9006658792495728, "learning_rate": 0.00016430837512142334, "loss": 1.6043, "step": 2090 }, { "epoch": 0.29952728835410397, "grad_norm": 1.1728410720825195, "learning_rate": 0.00016427283698482281, "loss": 1.4023, "step": 2091 }, { "epoch": 0.2996705343074058, "grad_norm": 1.1827518939971924, "learning_rate": 0.00016423728501187098, "loss": 1.6054, "step": 2092 }, { "epoch": 0.29981378026070765, "grad_norm": 0.9066771268844604, "learning_rate": 0.0001642017192102213, "loss": 1.4302, "step": 2093 }, { "epoch": 0.29995702621400944, "grad_norm": 0.9178932309150696, "learning_rate": 0.0001641661395875302, "loss": 1.4626, "step": 2094 }, { "epoch": 0.3001002721673113, "grad_norm": 1.1124746799468994, "learning_rate": 0.0001641305461514571, "loss": 1.5385, "step": 2095 }, { "epoch": 0.3002435181206131, "grad_norm": 1.167476773262024, "learning_rate": 0.00016409493890966442, "loss": 1.4419, "step": 2096 }, { "epoch": 0.3003867640739149, "grad_norm": 1.2050307989120483, "learning_rate": 0.00016405931786981755, "loss": 1.5183, "step": 2097 }, { "epoch": 0.30053001002721674, "grad_norm": 1.2343586683273315, "learning_rate": 0.00016402368303958468, "loss": 1.2939, "step": 2098 }, { "epoch": 0.30067325598051853, "grad_norm": 0.9498979449272156, "learning_rate": 0.00016398803442663724, "loss": 1.3712, "step": 2099 }, { "epoch": 0.30081650193382037, "grad_norm": 1.2889301776885986, "learning_rate": 0.00016395237203864943, "loss": 1.2372, "step": 2100 }, { "epoch": 0.3009597478871222, "grad_norm": 1.2563834190368652, "learning_rate": 0.0001639166958832985, "loss": 1.349, "step": 2101 }, { "epoch": 0.301102993840424, "grad_norm": 1.070231318473816, "learning_rate": 0.00016388100596826465, "loss": 1.3202, "step": 2102 }, { "epoch": 0.30124623979372583, "grad_norm": 0.9682635068893433, "learning_rate": 0.000163845302301231, "loss": 1.4629, "step": 2103 }, { "epoch": 0.3013894857470276, "grad_norm": 1.239965796470642, "learning_rate": 0.00016380958488988368, "loss": 1.4767, "step": 2104 }, { "epoch": 0.30153273170032946, "grad_norm": 1.1767518520355225, "learning_rate": 0.00016377385374191183, "loss": 1.3701, "step": 2105 }, { "epoch": 0.3016759776536313, "grad_norm": 0.9566437602043152, "learning_rate": 0.0001637381088650074, "loss": 1.5249, "step": 2106 }, { "epoch": 0.3018192236069331, "grad_norm": 1.1168581247329712, "learning_rate": 0.0001637023502668654, "loss": 1.5557, "step": 2107 }, { "epoch": 0.3019624695602349, "grad_norm": 1.2364850044250488, "learning_rate": 0.00016366657795518377, "loss": 1.248, "step": 2108 }, { "epoch": 0.30210571551353677, "grad_norm": 1.1146354675292969, "learning_rate": 0.00016363079193766345, "loss": 1.5762, "step": 2109 }, { "epoch": 0.30224896146683855, "grad_norm": 1.2413594722747803, "learning_rate": 0.00016359499222200818, "loss": 1.5448, "step": 2110 }, { "epoch": 0.3023922074201404, "grad_norm": 1.1661057472229004, "learning_rate": 0.00016355917881592485, "loss": 1.6454, "step": 2111 }, { "epoch": 0.3025354533734422, "grad_norm": 1.074952483177185, "learning_rate": 0.00016352335172712317, "loss": 1.6959, "step": 2112 }, { "epoch": 0.302678699326744, "grad_norm": 1.5178353786468506, "learning_rate": 0.0001634875109633158, "loss": 1.5195, "step": 2113 }, { "epoch": 0.30282194528004586, "grad_norm": 1.0164861679077148, "learning_rate": 0.00016345165653221838, "loss": 1.5207, "step": 2114 }, { "epoch": 0.30296519123334764, "grad_norm": 1.103379249572754, "learning_rate": 0.00016341578844154955, "loss": 1.5232, "step": 2115 }, { "epoch": 0.3031084371866495, "grad_norm": 1.0848551988601685, "learning_rate": 0.00016337990669903073, "loss": 1.5539, "step": 2116 }, { "epoch": 0.3032516831399513, "grad_norm": 1.0429604053497314, "learning_rate": 0.00016334401131238644, "loss": 1.6297, "step": 2117 }, { "epoch": 0.3033949290932531, "grad_norm": 1.0036472082138062, "learning_rate": 0.000163308102289344, "loss": 1.3732, "step": 2118 }, { "epoch": 0.30353817504655495, "grad_norm": 1.1866599321365356, "learning_rate": 0.00016327217963763374, "loss": 1.3574, "step": 2119 }, { "epoch": 0.30368142099985673, "grad_norm": 1.2575559616088867, "learning_rate": 0.000163236243364989, "loss": 1.6162, "step": 2120 }, { "epoch": 0.3038246669531586, "grad_norm": 0.9871320724487305, "learning_rate": 0.0001632002934791459, "loss": 1.5079, "step": 2121 }, { "epoch": 0.3039679129064604, "grad_norm": 1.0725313425064087, "learning_rate": 0.0001631643299878436, "loss": 1.5054, "step": 2122 }, { "epoch": 0.3041111588597622, "grad_norm": 1.0979975461959839, "learning_rate": 0.00016312835289882408, "loss": 1.6496, "step": 2123 }, { "epoch": 0.30425440481306404, "grad_norm": 1.101214051246643, "learning_rate": 0.0001630923622198324, "loss": 1.3688, "step": 2124 }, { "epoch": 0.3043976507663658, "grad_norm": 1.305351972579956, "learning_rate": 0.00016305635795861643, "loss": 1.3986, "step": 2125 }, { "epoch": 0.30454089671966766, "grad_norm": 1.2938451766967773, "learning_rate": 0.00016302034012292697, "loss": 1.3696, "step": 2126 }, { "epoch": 0.3046841426729695, "grad_norm": 1.298505425453186, "learning_rate": 0.0001629843087205178, "loss": 1.5394, "step": 2127 }, { "epoch": 0.3048273886262713, "grad_norm": 1.2057178020477295, "learning_rate": 0.0001629482637591456, "loss": 1.4988, "step": 2128 }, { "epoch": 0.30497063457957313, "grad_norm": 1.249611496925354, "learning_rate": 0.00016291220524656993, "loss": 1.5572, "step": 2129 }, { "epoch": 0.30511388053287497, "grad_norm": 1.1235121488571167, "learning_rate": 0.00016287613319055332, "loss": 1.5192, "step": 2130 }, { "epoch": 0.30525712648617676, "grad_norm": 1.4710801839828491, "learning_rate": 0.00016284004759886114, "loss": 1.4539, "step": 2131 }, { "epoch": 0.3054003724394786, "grad_norm": 0.978493332862854, "learning_rate": 0.00016280394847926178, "loss": 1.4619, "step": 2132 }, { "epoch": 0.3055436183927804, "grad_norm": 1.0383172035217285, "learning_rate": 0.0001627678358395265, "loss": 1.4563, "step": 2133 }, { "epoch": 0.3056868643460822, "grad_norm": 1.1714521646499634, "learning_rate": 0.0001627317096874294, "loss": 1.3108, "step": 2134 }, { "epoch": 0.30583011029938406, "grad_norm": 1.1852748394012451, "learning_rate": 0.00016269557003074757, "loss": 1.6014, "step": 2135 }, { "epoch": 0.30597335625268585, "grad_norm": 1.0720202922821045, "learning_rate": 0.00016265941687726099, "loss": 1.4473, "step": 2136 }, { "epoch": 0.3061166022059877, "grad_norm": 1.0016392469406128, "learning_rate": 0.00016262325023475252, "loss": 1.3899, "step": 2137 }, { "epoch": 0.3062598481592895, "grad_norm": 1.2735028266906738, "learning_rate": 0.000162587070111008, "loss": 1.5014, "step": 2138 }, { "epoch": 0.3064030941125913, "grad_norm": 1.3058689832687378, "learning_rate": 0.00016255087651381603, "loss": 1.3919, "step": 2139 }, { "epoch": 0.30654634006589315, "grad_norm": 1.1695564985275269, "learning_rate": 0.00016251466945096826, "loss": 1.3818, "step": 2140 }, { "epoch": 0.30668958601919494, "grad_norm": 1.0100617408752441, "learning_rate": 0.00016247844893025918, "loss": 1.4555, "step": 2141 }, { "epoch": 0.3068328319724968, "grad_norm": 1.0054876804351807, "learning_rate": 0.0001624422149594861, "loss": 1.4446, "step": 2142 }, { "epoch": 0.3069760779257986, "grad_norm": 1.1957088708877563, "learning_rate": 0.00016240596754644936, "loss": 1.3559, "step": 2143 }, { "epoch": 0.3071193238791004, "grad_norm": 1.2647818326950073, "learning_rate": 0.00016236970669895214, "loss": 1.6286, "step": 2144 }, { "epoch": 0.30726256983240224, "grad_norm": 1.10520339012146, "learning_rate": 0.00016233343242480047, "loss": 1.5808, "step": 2145 }, { "epoch": 0.30740581578570403, "grad_norm": 1.2879663705825806, "learning_rate": 0.00016229714473180328, "loss": 1.5655, "step": 2146 }, { "epoch": 0.30754906173900587, "grad_norm": 1.2024028301239014, "learning_rate": 0.00016226084362777247, "loss": 1.4058, "step": 2147 }, { "epoch": 0.3076923076923077, "grad_norm": 1.2346138954162598, "learning_rate": 0.00016222452912052272, "loss": 1.3247, "step": 2148 }, { "epoch": 0.3078355536456095, "grad_norm": 1.369972825050354, "learning_rate": 0.0001621882012178717, "loss": 1.4775, "step": 2149 }, { "epoch": 0.30797879959891133, "grad_norm": 1.1432427167892456, "learning_rate": 0.0001621518599276399, "loss": 1.4419, "step": 2150 }, { "epoch": 0.3081220455522132, "grad_norm": 1.015098214149475, "learning_rate": 0.00016211550525765063, "loss": 1.5148, "step": 2151 }, { "epoch": 0.30826529150551496, "grad_norm": 1.3381190299987793, "learning_rate": 0.0001620791372157302, "loss": 1.4458, "step": 2152 }, { "epoch": 0.3084085374588168, "grad_norm": 1.2559499740600586, "learning_rate": 0.00016204275580970773, "loss": 1.3879, "step": 2153 }, { "epoch": 0.3085517834121186, "grad_norm": 1.3334771394729614, "learning_rate": 0.00016200636104741532, "loss": 1.499, "step": 2154 }, { "epoch": 0.3086950293654204, "grad_norm": 1.1180161237716675, "learning_rate": 0.00016196995293668774, "loss": 1.6303, "step": 2155 }, { "epoch": 0.30883827531872227, "grad_norm": 1.1852434873580933, "learning_rate": 0.0001619335314853628, "loss": 1.4955, "step": 2156 }, { "epoch": 0.30898152127202405, "grad_norm": 1.0549120903015137, "learning_rate": 0.00016189709670128122, "loss": 1.4196, "step": 2157 }, { "epoch": 0.3091247672253259, "grad_norm": 1.0799815654754639, "learning_rate": 0.00016186064859228638, "loss": 1.4356, "step": 2158 }, { "epoch": 0.3092680131786277, "grad_norm": 0.9937320947647095, "learning_rate": 0.0001618241871662247, "loss": 1.4904, "step": 2159 }, { "epoch": 0.3094112591319295, "grad_norm": 1.2357194423675537, "learning_rate": 0.00016178771243094547, "loss": 1.4504, "step": 2160 }, { "epoch": 0.30955450508523136, "grad_norm": 1.3001954555511475, "learning_rate": 0.00016175122439430077, "loss": 1.5392, "step": 2161 }, { "epoch": 0.30969775103853314, "grad_norm": 1.202223300933838, "learning_rate": 0.00016171472306414554, "loss": 1.4394, "step": 2162 }, { "epoch": 0.309840996991835, "grad_norm": 1.0657094717025757, "learning_rate": 0.00016167820844833764, "loss": 1.4745, "step": 2163 }, { "epoch": 0.3099842429451368, "grad_norm": 0.9404382109642029, "learning_rate": 0.00016164168055473775, "loss": 1.5796, "step": 2164 }, { "epoch": 0.3101274888984386, "grad_norm": 1.0849108695983887, "learning_rate": 0.00016160513939120943, "loss": 1.6945, "step": 2165 }, { "epoch": 0.31027073485174045, "grad_norm": 1.1392961740493774, "learning_rate": 0.0001615685849656191, "loss": 1.5728, "step": 2166 }, { "epoch": 0.31041398080504223, "grad_norm": 0.9921726584434509, "learning_rate": 0.00016153201728583602, "loss": 1.6066, "step": 2167 }, { "epoch": 0.3105572267583441, "grad_norm": 1.2363765239715576, "learning_rate": 0.0001614954363597323, "loss": 1.5385, "step": 2168 }, { "epoch": 0.3107004727116459, "grad_norm": 1.2025961875915527, "learning_rate": 0.0001614588421951829, "loss": 1.5741, "step": 2169 }, { "epoch": 0.3108437186649477, "grad_norm": 1.1037356853485107, "learning_rate": 0.00016142223480006563, "loss": 1.4146, "step": 2170 }, { "epoch": 0.31098696461824954, "grad_norm": 1.018072247505188, "learning_rate": 0.0001613856141822612, "loss": 1.6295, "step": 2171 }, { "epoch": 0.3111302105715514, "grad_norm": 1.3242359161376953, "learning_rate": 0.0001613489803496531, "loss": 1.6588, "step": 2172 }, { "epoch": 0.31127345652485316, "grad_norm": 1.4029531478881836, "learning_rate": 0.00016131233331012762, "loss": 1.5318, "step": 2173 }, { "epoch": 0.311416702478155, "grad_norm": 1.3314331769943237, "learning_rate": 0.0001612756730715741, "loss": 1.437, "step": 2174 }, { "epoch": 0.3115599484314568, "grad_norm": 1.1653331518173218, "learning_rate": 0.00016123899964188448, "loss": 1.4383, "step": 2175 }, { "epoch": 0.31170319438475863, "grad_norm": 1.15831458568573, "learning_rate": 0.00016120231302895366, "loss": 1.4641, "step": 2176 }, { "epoch": 0.31184644033806047, "grad_norm": 1.431643009185791, "learning_rate": 0.00016116561324067935, "loss": 1.3607, "step": 2177 }, { "epoch": 0.31198968629136226, "grad_norm": 1.15006422996521, "learning_rate": 0.00016112890028496216, "loss": 1.5628, "step": 2178 }, { "epoch": 0.3121329322446641, "grad_norm": 1.111311435699463, "learning_rate": 0.0001610921741697054, "loss": 1.5959, "step": 2179 }, { "epoch": 0.3122761781979659, "grad_norm": 1.1853933334350586, "learning_rate": 0.00016105543490281535, "loss": 1.6071, "step": 2180 }, { "epoch": 0.3124194241512677, "grad_norm": 1.5622564554214478, "learning_rate": 0.0001610186824922011, "loss": 1.6407, "step": 2181 }, { "epoch": 0.31256267010456956, "grad_norm": 1.170989990234375, "learning_rate": 0.00016098191694577442, "loss": 1.7186, "step": 2182 }, { "epoch": 0.31270591605787135, "grad_norm": 1.3031423091888428, "learning_rate": 0.00016094513827145016, "loss": 1.4696, "step": 2183 }, { "epoch": 0.3128491620111732, "grad_norm": 1.1214048862457275, "learning_rate": 0.00016090834647714575, "loss": 1.5491, "step": 2184 }, { "epoch": 0.31299240796447503, "grad_norm": 1.1624176502227783, "learning_rate": 0.00016087154157078156, "loss": 1.3835, "step": 2185 }, { "epoch": 0.3131356539177768, "grad_norm": 1.067884087562561, "learning_rate": 0.00016083472356028086, "loss": 1.7291, "step": 2186 }, { "epoch": 0.31327889987107865, "grad_norm": 1.4405581951141357, "learning_rate": 0.00016079789245356958, "loss": 1.3773, "step": 2187 }, { "epoch": 0.31342214582438044, "grad_norm": 1.1195565462112427, "learning_rate": 0.00016076104825857657, "loss": 1.5359, "step": 2188 }, { "epoch": 0.3135653917776823, "grad_norm": 1.4966022968292236, "learning_rate": 0.00016072419098323346, "loss": 1.456, "step": 2189 }, { "epoch": 0.3137086377309841, "grad_norm": 1.0616106986999512, "learning_rate": 0.00016068732063547473, "loss": 1.4872, "step": 2190 }, { "epoch": 0.3138518836842859, "grad_norm": 1.1995162963867188, "learning_rate": 0.00016065043722323768, "loss": 1.5527, "step": 2191 }, { "epoch": 0.31399512963758774, "grad_norm": 1.1095688343048096, "learning_rate": 0.0001606135407544623, "loss": 1.4138, "step": 2192 }, { "epoch": 0.3141383755908896, "grad_norm": 1.0261632204055786, "learning_rate": 0.0001605766312370916, "loss": 1.4086, "step": 2193 }, { "epoch": 0.31428162154419137, "grad_norm": 1.0876903533935547, "learning_rate": 0.0001605397086790712, "loss": 1.5084, "step": 2194 }, { "epoch": 0.3144248674974932, "grad_norm": 1.123974323272705, "learning_rate": 0.00016050277308834966, "loss": 1.492, "step": 2195 }, { "epoch": 0.314568113450795, "grad_norm": 1.466103434562683, "learning_rate": 0.00016046582447287828, "loss": 1.3834, "step": 2196 }, { "epoch": 0.31471135940409684, "grad_norm": 1.1065490245819092, "learning_rate": 0.0001604288628406112, "loss": 1.3981, "step": 2197 }, { "epoch": 0.3148546053573987, "grad_norm": 1.269467830657959, "learning_rate": 0.0001603918881995053, "loss": 1.5069, "step": 2198 }, { "epoch": 0.31499785131070046, "grad_norm": 1.3922014236450195, "learning_rate": 0.00016035490055752035, "loss": 1.3686, "step": 2199 }, { "epoch": 0.3151410972640023, "grad_norm": 1.3569414615631104, "learning_rate": 0.0001603178999226189, "loss": 1.3837, "step": 2200 }, { "epoch": 0.3152843432173041, "grad_norm": 1.118282437324524, "learning_rate": 0.0001602808863027662, "loss": 1.4184, "step": 2201 }, { "epoch": 0.3154275891706059, "grad_norm": 1.2146110534667969, "learning_rate": 0.0001602438597059304, "loss": 1.5382, "step": 2202 }, { "epoch": 0.31557083512390777, "grad_norm": 1.2688679695129395, "learning_rate": 0.00016020682014008242, "loss": 1.5125, "step": 2203 }, { "epoch": 0.31571408107720955, "grad_norm": 1.3139230012893677, "learning_rate": 0.00016016976761319595, "loss": 1.3806, "step": 2204 }, { "epoch": 0.3158573270305114, "grad_norm": 1.0200927257537842, "learning_rate": 0.0001601327021332475, "loss": 1.2959, "step": 2205 }, { "epoch": 0.31600057298381323, "grad_norm": 1.351724624633789, "learning_rate": 0.0001600956237082163, "loss": 1.3509, "step": 2206 }, { "epoch": 0.316143818937115, "grad_norm": 1.0325864553451538, "learning_rate": 0.00016005853234608446, "loss": 1.5371, "step": 2207 }, { "epoch": 0.31628706489041686, "grad_norm": 1.2085545063018799, "learning_rate": 0.00016002142805483685, "loss": 1.4455, "step": 2208 }, { "epoch": 0.31643031084371864, "grad_norm": 1.042733907699585, "learning_rate": 0.00015998431084246107, "loss": 1.4025, "step": 2209 }, { "epoch": 0.3165735567970205, "grad_norm": 1.0463552474975586, "learning_rate": 0.0001599471807169475, "loss": 1.5703, "step": 2210 }, { "epoch": 0.3167168027503223, "grad_norm": 1.1998884677886963, "learning_rate": 0.00015991003768628942, "loss": 1.4712, "step": 2211 }, { "epoch": 0.3168600487036241, "grad_norm": 0.9740208983421326, "learning_rate": 0.00015987288175848277, "loss": 1.5639, "step": 2212 }, { "epoch": 0.31700329465692595, "grad_norm": 1.4041082859039307, "learning_rate": 0.0001598357129415263, "loss": 1.474, "step": 2213 }, { "epoch": 0.31714654061022773, "grad_norm": 1.0597376823425293, "learning_rate": 0.00015979853124342154, "loss": 1.5728, "step": 2214 }, { "epoch": 0.3172897865635296, "grad_norm": 1.0904829502105713, "learning_rate": 0.00015976133667217277, "loss": 1.3362, "step": 2215 }, { "epoch": 0.3174330325168314, "grad_norm": 1.0378105640411377, "learning_rate": 0.00015972412923578712, "loss": 1.5321, "step": 2216 }, { "epoch": 0.3175762784701332, "grad_norm": 0.957038938999176, "learning_rate": 0.00015968690894227437, "loss": 1.3742, "step": 2217 }, { "epoch": 0.31771952442343504, "grad_norm": 1.2085511684417725, "learning_rate": 0.0001596496757996471, "loss": 1.4769, "step": 2218 }, { "epoch": 0.3178627703767369, "grad_norm": 1.1358758211135864, "learning_rate": 0.00015961242981592076, "loss": 1.3116, "step": 2219 }, { "epoch": 0.31800601633003867, "grad_norm": 1.0222986936569214, "learning_rate": 0.00015957517099911349, "loss": 1.6448, "step": 2220 }, { "epoch": 0.3181492622833405, "grad_norm": 1.0717604160308838, "learning_rate": 0.00015953789935724613, "loss": 1.7298, "step": 2221 }, { "epoch": 0.3182925082366423, "grad_norm": 1.1606909036636353, "learning_rate": 0.0001595006148983424, "loss": 1.3108, "step": 2222 }, { "epoch": 0.31843575418994413, "grad_norm": 1.3621573448181152, "learning_rate": 0.00015946331763042867, "loss": 1.3623, "step": 2223 }, { "epoch": 0.31857900014324597, "grad_norm": 1.155504584312439, "learning_rate": 0.00015942600756153414, "loss": 1.356, "step": 2224 }, { "epoch": 0.31872224609654776, "grad_norm": 1.0299174785614014, "learning_rate": 0.00015938868469969077, "loss": 1.4444, "step": 2225 }, { "epoch": 0.3188654920498496, "grad_norm": 1.0487732887268066, "learning_rate": 0.00015935134905293322, "loss": 1.6577, "step": 2226 }, { "epoch": 0.31900873800315144, "grad_norm": 1.1563888788223267, "learning_rate": 0.0001593140006292989, "loss": 1.3916, "step": 2227 }, { "epoch": 0.3191519839564532, "grad_norm": 1.2340785264968872, "learning_rate": 0.00015927663943682808, "loss": 1.3571, "step": 2228 }, { "epoch": 0.31929522990975506, "grad_norm": 1.4335360527038574, "learning_rate": 0.00015923926548356364, "loss": 1.4666, "step": 2229 }, { "epoch": 0.31943847586305685, "grad_norm": 0.9405779242515564, "learning_rate": 0.00015920187877755128, "loss": 1.4326, "step": 2230 }, { "epoch": 0.3195817218163587, "grad_norm": 1.0857157707214355, "learning_rate": 0.00015916447932683947, "loss": 1.4451, "step": 2231 }, { "epoch": 0.31972496776966053, "grad_norm": 1.1476625204086304, "learning_rate": 0.00015912706713947932, "loss": 1.5027, "step": 2232 }, { "epoch": 0.3198682137229623, "grad_norm": 1.0132766962051392, "learning_rate": 0.0001590896422235248, "loss": 1.5499, "step": 2233 }, { "epoch": 0.32001145967626415, "grad_norm": 1.142949104309082, "learning_rate": 0.00015905220458703253, "loss": 1.3891, "step": 2234 }, { "epoch": 0.32015470562956594, "grad_norm": 0.990533173084259, "learning_rate": 0.00015901475423806195, "loss": 1.4042, "step": 2235 }, { "epoch": 0.3202979515828678, "grad_norm": 1.1556705236434937, "learning_rate": 0.00015897729118467517, "loss": 1.4879, "step": 2236 }, { "epoch": 0.3204411975361696, "grad_norm": 1.2752677202224731, "learning_rate": 0.00015893981543493705, "loss": 1.4728, "step": 2237 }, { "epoch": 0.3205844434894714, "grad_norm": 1.0377402305603027, "learning_rate": 0.0001589023269969152, "loss": 1.4546, "step": 2238 }, { "epoch": 0.32072768944277324, "grad_norm": 0.9539663791656494, "learning_rate": 0.00015886482587867997, "loss": 1.5698, "step": 2239 }, { "epoch": 0.3208709353960751, "grad_norm": 0.9791962504386902, "learning_rate": 0.0001588273120883044, "loss": 1.4965, "step": 2240 }, { "epoch": 0.32101418134937687, "grad_norm": 1.2567335367202759, "learning_rate": 0.00015878978563386425, "loss": 1.6168, "step": 2241 }, { "epoch": 0.3211574273026787, "grad_norm": 0.9634516835212708, "learning_rate": 0.00015875224652343815, "loss": 1.4496, "step": 2242 }, { "epoch": 0.3213006732559805, "grad_norm": 1.3572815656661987, "learning_rate": 0.00015871469476510722, "loss": 1.3976, "step": 2243 }, { "epoch": 0.32144391920928234, "grad_norm": 1.1213479042053223, "learning_rate": 0.00015867713036695546, "loss": 1.4437, "step": 2244 }, { "epoch": 0.3215871651625842, "grad_norm": 1.2937026023864746, "learning_rate": 0.00015863955333706957, "loss": 1.416, "step": 2245 }, { "epoch": 0.32173041111588596, "grad_norm": 1.1191105842590332, "learning_rate": 0.00015860196368353897, "loss": 1.4718, "step": 2246 }, { "epoch": 0.3218736570691878, "grad_norm": 0.9822176694869995, "learning_rate": 0.00015856436141445577, "loss": 1.5605, "step": 2247 }, { "epoch": 0.32201690302248964, "grad_norm": 0.9939042329788208, "learning_rate": 0.00015852674653791477, "loss": 1.3622, "step": 2248 }, { "epoch": 0.3221601489757914, "grad_norm": 1.1110790967941284, "learning_rate": 0.00015848911906201355, "loss": 1.5047, "step": 2249 }, { "epoch": 0.32230339492909327, "grad_norm": 1.171623945236206, "learning_rate": 0.0001584514789948524, "loss": 1.571, "step": 2250 }, { "epoch": 0.32244664088239505, "grad_norm": 1.0576767921447754, "learning_rate": 0.00015841382634453427, "loss": 1.4366, "step": 2251 }, { "epoch": 0.3225898868356969, "grad_norm": 1.228026032447815, "learning_rate": 0.00015837616111916483, "loss": 1.3506, "step": 2252 }, { "epoch": 0.32273313278899873, "grad_norm": 1.191338300704956, "learning_rate": 0.0001583384833268525, "loss": 1.6866, "step": 2253 }, { "epoch": 0.3228763787423005, "grad_norm": 1.400315284729004, "learning_rate": 0.00015830079297570837, "loss": 1.4451, "step": 2254 }, { "epoch": 0.32301962469560236, "grad_norm": 1.071686029434204, "learning_rate": 0.0001582630900738462, "loss": 1.3625, "step": 2255 }, { "epoch": 0.32316287064890414, "grad_norm": 1.098414421081543, "learning_rate": 0.00015822537462938254, "loss": 1.4156, "step": 2256 }, { "epoch": 0.323306116602206, "grad_norm": 1.3927415609359741, "learning_rate": 0.0001581876466504366, "loss": 1.4602, "step": 2257 }, { "epoch": 0.3234493625555078, "grad_norm": 1.1547175645828247, "learning_rate": 0.00015814990614513024, "loss": 1.3088, "step": 2258 }, { "epoch": 0.3235926085088096, "grad_norm": 1.1632882356643677, "learning_rate": 0.00015811215312158807, "loss": 1.3343, "step": 2259 }, { "epoch": 0.32373585446211145, "grad_norm": 1.2297313213348389, "learning_rate": 0.00015807438758793735, "loss": 1.5297, "step": 2260 }, { "epoch": 0.3238791004154133, "grad_norm": 1.1143958568572998, "learning_rate": 0.00015803660955230817, "loss": 1.4774, "step": 2261 }, { "epoch": 0.3240223463687151, "grad_norm": 1.2539821863174438, "learning_rate": 0.0001579988190228331, "loss": 1.4504, "step": 2262 }, { "epoch": 0.3241655923220169, "grad_norm": 1.1935629844665527, "learning_rate": 0.00015796101600764755, "loss": 1.5425, "step": 2263 }, { "epoch": 0.3243088382753187, "grad_norm": 1.5752179622650146, "learning_rate": 0.00015792320051488955, "loss": 1.4837, "step": 2264 }, { "epoch": 0.32445208422862054, "grad_norm": 1.1541529893875122, "learning_rate": 0.0001578853725526999, "loss": 1.6657, "step": 2265 }, { "epoch": 0.3245953301819224, "grad_norm": 1.1816414594650269, "learning_rate": 0.00015784753212922192, "loss": 1.4886, "step": 2266 }, { "epoch": 0.32473857613522417, "grad_norm": 1.0843379497528076, "learning_rate": 0.0001578096792526018, "loss": 1.5176, "step": 2267 }, { "epoch": 0.324881822088526, "grad_norm": 1.0409349203109741, "learning_rate": 0.00015777181393098833, "loss": 1.4746, "step": 2268 }, { "epoch": 0.3250250680418278, "grad_norm": 1.1670563220977783, "learning_rate": 0.0001577339361725329, "loss": 1.4648, "step": 2269 }, { "epoch": 0.32516831399512963, "grad_norm": 1.6981006860733032, "learning_rate": 0.0001576960459853898, "loss": 1.3805, "step": 2270 }, { "epoch": 0.3253115599484315, "grad_norm": 1.0562732219696045, "learning_rate": 0.00015765814337771565, "loss": 1.4242, "step": 2271 }, { "epoch": 0.32545480590173326, "grad_norm": 1.0475010871887207, "learning_rate": 0.00015762022835767012, "loss": 1.6098, "step": 2272 }, { "epoch": 0.3255980518550351, "grad_norm": 1.4024945497512817, "learning_rate": 0.00015758230093341523, "loss": 1.6323, "step": 2273 }, { "epoch": 0.32574129780833694, "grad_norm": 0.9667457342147827, "learning_rate": 0.00015754436111311594, "loss": 1.4453, "step": 2274 }, { "epoch": 0.3258845437616387, "grad_norm": 1.225041389465332, "learning_rate": 0.0001575064089049397, "loss": 1.2455, "step": 2275 }, { "epoch": 0.32602778971494056, "grad_norm": 1.1015914678573608, "learning_rate": 0.0001574684443170567, "loss": 1.5497, "step": 2276 }, { "epoch": 0.32617103566824235, "grad_norm": 1.1255090236663818, "learning_rate": 0.00015743046735763975, "loss": 1.4143, "step": 2277 }, { "epoch": 0.3263142816215442, "grad_norm": 1.0734444856643677, "learning_rate": 0.00015739247803486434, "loss": 1.269, "step": 2278 }, { "epoch": 0.32645752757484603, "grad_norm": 1.330520749092102, "learning_rate": 0.00015735447635690868, "loss": 1.3547, "step": 2279 }, { "epoch": 0.3266007735281478, "grad_norm": 1.29810631275177, "learning_rate": 0.00015731646233195357, "loss": 1.5087, "step": 2280 }, { "epoch": 0.32674401948144965, "grad_norm": 1.3337150812149048, "learning_rate": 0.00015727843596818243, "loss": 1.4006, "step": 2281 }, { "epoch": 0.3268872654347515, "grad_norm": 0.9875546097755432, "learning_rate": 0.00015724039727378148, "loss": 1.6777, "step": 2282 }, { "epoch": 0.3270305113880533, "grad_norm": 1.3372234106063843, "learning_rate": 0.0001572023462569395, "loss": 1.554, "step": 2283 }, { "epoch": 0.3271737573413551, "grad_norm": 1.1482656002044678, "learning_rate": 0.00015716428292584787, "loss": 1.5611, "step": 2284 }, { "epoch": 0.3273170032946569, "grad_norm": 1.0315511226654053, "learning_rate": 0.00015712620728870074, "loss": 1.5942, "step": 2285 }, { "epoch": 0.32746024924795875, "grad_norm": 1.0177346467971802, "learning_rate": 0.00015708811935369484, "loss": 1.3909, "step": 2286 }, { "epoch": 0.3276034952012606, "grad_norm": 1.2984817028045654, "learning_rate": 0.00015705001912902957, "loss": 1.2198, "step": 2287 }, { "epoch": 0.32774674115456237, "grad_norm": 1.277712106704712, "learning_rate": 0.00015701190662290693, "loss": 1.4536, "step": 2288 }, { "epoch": 0.3278899871078642, "grad_norm": 1.0963908433914185, "learning_rate": 0.0001569737818435316, "loss": 1.5342, "step": 2289 }, { "epoch": 0.328033233061166, "grad_norm": 1.3517429828643799, "learning_rate": 0.00015693564479911097, "loss": 1.4373, "step": 2290 }, { "epoch": 0.32817647901446784, "grad_norm": 1.0697838068008423, "learning_rate": 0.0001568974954978549, "loss": 1.4788, "step": 2291 }, { "epoch": 0.3283197249677697, "grad_norm": 1.1097118854522705, "learning_rate": 0.00015685933394797607, "loss": 1.5807, "step": 2292 }, { "epoch": 0.32846297092107146, "grad_norm": 1.1345523595809937, "learning_rate": 0.00015682116015768965, "loss": 1.3191, "step": 2293 }, { "epoch": 0.3286062168743733, "grad_norm": 1.0295121669769287, "learning_rate": 0.00015678297413521363, "loss": 1.6011, "step": 2294 }, { "epoch": 0.32874946282767514, "grad_norm": 1.204581379890442, "learning_rate": 0.00015674477588876838, "loss": 1.4564, "step": 2295 }, { "epoch": 0.32889270878097693, "grad_norm": 1.2863401174545288, "learning_rate": 0.00015670656542657714, "loss": 1.606, "step": 2296 }, { "epoch": 0.32903595473427877, "grad_norm": 0.9845897555351257, "learning_rate": 0.0001566683427568656, "loss": 1.6463, "step": 2297 }, { "epoch": 0.32917920068758055, "grad_norm": 1.2133171558380127, "learning_rate": 0.00015663010788786221, "loss": 1.453, "step": 2298 }, { "epoch": 0.3293224466408824, "grad_norm": 1.4354416131973267, "learning_rate": 0.00015659186082779797, "loss": 1.5182, "step": 2299 }, { "epoch": 0.32946569259418423, "grad_norm": 1.2099313735961914, "learning_rate": 0.00015655360158490651, "loss": 1.4823, "step": 2300 }, { "epoch": 0.329608938547486, "grad_norm": 1.1526235342025757, "learning_rate": 0.00015651533016742414, "loss": 1.7675, "step": 2301 }, { "epoch": 0.32975218450078786, "grad_norm": 1.070486307144165, "learning_rate": 0.00015647704658358966, "loss": 1.4568, "step": 2302 }, { "epoch": 0.32989543045408964, "grad_norm": 0.9689673781394958, "learning_rate": 0.00015643875084164467, "loss": 1.5509, "step": 2303 }, { "epoch": 0.3300386764073915, "grad_norm": 1.198616862297058, "learning_rate": 0.00015640044294983329, "loss": 1.7433, "step": 2304 }, { "epoch": 0.3301819223606933, "grad_norm": 1.1746795177459717, "learning_rate": 0.00015636212291640224, "loss": 1.4049, "step": 2305 }, { "epoch": 0.3303251683139951, "grad_norm": 1.2097768783569336, "learning_rate": 0.00015632379074960082, "loss": 1.6876, "step": 2306 }, { "epoch": 0.33046841426729695, "grad_norm": 1.1446720361709595, "learning_rate": 0.0001562854464576811, "loss": 1.5306, "step": 2307 }, { "epoch": 0.3306116602205988, "grad_norm": 1.4527058601379395, "learning_rate": 0.00015624709004889758, "loss": 1.7195, "step": 2308 }, { "epoch": 0.3307549061739006, "grad_norm": 1.2266982793807983, "learning_rate": 0.0001562087215315075, "loss": 1.5028, "step": 2309 }, { "epoch": 0.3308981521272024, "grad_norm": 0.8389855027198792, "learning_rate": 0.0001561703409137706, "loss": 1.4199, "step": 2310 }, { "epoch": 0.3310413980805042, "grad_norm": 1.0936599969863892, "learning_rate": 0.0001561319482039493, "loss": 1.3433, "step": 2311 }, { "epoch": 0.33118464403380604, "grad_norm": 1.170125126838684, "learning_rate": 0.0001560935434103086, "loss": 1.3587, "step": 2312 }, { "epoch": 0.3313278899871079, "grad_norm": 1.2092872858047485, "learning_rate": 0.00015605512654111615, "loss": 1.427, "step": 2313 }, { "epoch": 0.33147113594040967, "grad_norm": 0.9599800109863281, "learning_rate": 0.00015601669760464203, "loss": 1.4219, "step": 2314 }, { "epoch": 0.3316143818937115, "grad_norm": 1.182982325553894, "learning_rate": 0.00015597825660915916, "loss": 1.4012, "step": 2315 }, { "epoch": 0.33175762784701335, "grad_norm": 1.0515201091766357, "learning_rate": 0.0001559398035629429, "loss": 1.388, "step": 2316 }, { "epoch": 0.33190087380031513, "grad_norm": 1.3263829946517944, "learning_rate": 0.00015590133847427116, "loss": 1.7417, "step": 2317 }, { "epoch": 0.332044119753617, "grad_norm": 0.9802058935165405, "learning_rate": 0.00015586286135142467, "loss": 1.4557, "step": 2318 }, { "epoch": 0.33218736570691876, "grad_norm": 0.9632196426391602, "learning_rate": 0.00015582437220268647, "loss": 1.6805, "step": 2319 }, { "epoch": 0.3323306116602206, "grad_norm": 0.9994145035743713, "learning_rate": 0.00015578587103634242, "loss": 1.4888, "step": 2320 }, { "epoch": 0.33247385761352244, "grad_norm": 1.0613521337509155, "learning_rate": 0.00015574735786068078, "loss": 1.6683, "step": 2321 }, { "epoch": 0.3326171035668242, "grad_norm": 1.0585310459136963, "learning_rate": 0.00015570883268399257, "loss": 1.4463, "step": 2322 }, { "epoch": 0.33276034952012606, "grad_norm": 1.0785530805587769, "learning_rate": 0.0001556702955145712, "loss": 1.4671, "step": 2323 }, { "epoch": 0.33290359547342785, "grad_norm": 1.5648711919784546, "learning_rate": 0.00015563174636071286, "loss": 1.4255, "step": 2324 }, { "epoch": 0.3330468414267297, "grad_norm": 1.15556800365448, "learning_rate": 0.00015559318523071622, "loss": 1.3972, "step": 2325 }, { "epoch": 0.33319008738003153, "grad_norm": 1.247313380241394, "learning_rate": 0.0001555546121328825, "loss": 1.3692, "step": 2326 }, { "epoch": 0.3333333333333333, "grad_norm": 1.0710923671722412, "learning_rate": 0.00015551602707551557, "loss": 1.5111, "step": 2327 }, { "epoch": 0.33347657928663516, "grad_norm": 1.0387035608291626, "learning_rate": 0.00015547743006692177, "loss": 1.4257, "step": 2328 }, { "epoch": 0.333619825239937, "grad_norm": 1.3260657787322998, "learning_rate": 0.00015543882111541016, "loss": 1.4778, "step": 2329 }, { "epoch": 0.3337630711932388, "grad_norm": 1.0877724885940552, "learning_rate": 0.0001554002002292923, "loss": 1.4053, "step": 2330 }, { "epoch": 0.3339063171465406, "grad_norm": 1.1008830070495605, "learning_rate": 0.00015536156741688222, "loss": 1.628, "step": 2331 }, { "epoch": 0.3340495630998424, "grad_norm": 1.6206791400909424, "learning_rate": 0.00015532292268649668, "loss": 1.3678, "step": 2332 }, { "epoch": 0.33419280905314425, "grad_norm": 1.35749089717865, "learning_rate": 0.0001552842660464549, "loss": 1.5505, "step": 2333 }, { "epoch": 0.3343360550064461, "grad_norm": 0.9546893835067749, "learning_rate": 0.00015524559750507873, "loss": 1.4559, "step": 2334 }, { "epoch": 0.33447930095974787, "grad_norm": 1.160882592201233, "learning_rate": 0.00015520691707069254, "loss": 1.2959, "step": 2335 }, { "epoch": 0.3346225469130497, "grad_norm": 1.0720781087875366, "learning_rate": 0.00015516822475162325, "loss": 1.5045, "step": 2336 }, { "epoch": 0.33476579286635155, "grad_norm": 1.3178445100784302, "learning_rate": 0.0001551295205562004, "loss": 1.4813, "step": 2337 }, { "epoch": 0.33490903881965334, "grad_norm": 1.1616284847259521, "learning_rate": 0.000155090804492756, "loss": 1.5558, "step": 2338 }, { "epoch": 0.3350522847729552, "grad_norm": 1.1865198612213135, "learning_rate": 0.00015505207656962467, "loss": 1.536, "step": 2339 }, { "epoch": 0.33519553072625696, "grad_norm": 1.0836961269378662, "learning_rate": 0.00015501333679514357, "loss": 1.3854, "step": 2340 }, { "epoch": 0.3353387766795588, "grad_norm": 1.137036681175232, "learning_rate": 0.00015497458517765245, "loss": 1.381, "step": 2341 }, { "epoch": 0.33548202263286064, "grad_norm": 1.2110360860824585, "learning_rate": 0.00015493582172549354, "loss": 1.3714, "step": 2342 }, { "epoch": 0.33562526858616243, "grad_norm": 1.1092302799224854, "learning_rate": 0.00015489704644701166, "loss": 1.4093, "step": 2343 }, { "epoch": 0.33576851453946427, "grad_norm": 1.2615810632705688, "learning_rate": 0.00015485825935055418, "loss": 1.4011, "step": 2344 }, { "epoch": 0.33591176049276605, "grad_norm": 1.0309126377105713, "learning_rate": 0.00015481946044447099, "loss": 1.4938, "step": 2345 }, { "epoch": 0.3360550064460679, "grad_norm": 1.0209304094314575, "learning_rate": 0.0001547806497371145, "loss": 1.398, "step": 2346 }, { "epoch": 0.33619825239936973, "grad_norm": 1.1450188159942627, "learning_rate": 0.0001547418272368398, "loss": 1.4812, "step": 2347 }, { "epoch": 0.3363414983526715, "grad_norm": 1.2547285556793213, "learning_rate": 0.00015470299295200434, "loss": 1.491, "step": 2348 }, { "epoch": 0.33648474430597336, "grad_norm": 1.1752949953079224, "learning_rate": 0.00015466414689096816, "loss": 1.4352, "step": 2349 }, { "epoch": 0.3366279902592752, "grad_norm": 1.191088318824768, "learning_rate": 0.00015462528906209387, "loss": 1.4359, "step": 2350 }, { "epoch": 0.336771236212577, "grad_norm": 1.062980055809021, "learning_rate": 0.0001545864194737466, "loss": 1.5149, "step": 2351 }, { "epoch": 0.3369144821658788, "grad_norm": 1.3870859146118164, "learning_rate": 0.00015454753813429407, "loss": 1.2917, "step": 2352 }, { "epoch": 0.3370577281191806, "grad_norm": 1.1073658466339111, "learning_rate": 0.0001545086450521064, "loss": 1.5295, "step": 2353 }, { "epoch": 0.33720097407248245, "grad_norm": 1.0015462636947632, "learning_rate": 0.00015446974023555632, "loss": 1.3628, "step": 2354 }, { "epoch": 0.3373442200257843, "grad_norm": 1.0080305337905884, "learning_rate": 0.00015443082369301912, "loss": 1.3981, "step": 2355 }, { "epoch": 0.3374874659790861, "grad_norm": 1.0880720615386963, "learning_rate": 0.00015439189543287247, "loss": 1.4368, "step": 2356 }, { "epoch": 0.3376307119323879, "grad_norm": 1.1552070379257202, "learning_rate": 0.00015435295546349678, "loss": 1.3068, "step": 2357 }, { "epoch": 0.3377739578856897, "grad_norm": 1.0665514469146729, "learning_rate": 0.0001543140037932748, "loss": 1.5995, "step": 2358 }, { "epoch": 0.33791720383899154, "grad_norm": 1.5568562746047974, "learning_rate": 0.0001542750404305918, "loss": 1.4746, "step": 2359 }, { "epoch": 0.3380604497922934, "grad_norm": 0.9816345572471619, "learning_rate": 0.00015423606538383577, "loss": 1.521, "step": 2360 }, { "epoch": 0.33820369574559517, "grad_norm": 0.9525036215782166, "learning_rate": 0.00015419707866139696, "loss": 1.4592, "step": 2361 }, { "epoch": 0.338346941698897, "grad_norm": 1.0205943584442139, "learning_rate": 0.00015415808027166832, "loss": 1.3816, "step": 2362 }, { "epoch": 0.33849018765219885, "grad_norm": 1.1603480577468872, "learning_rate": 0.00015411907022304516, "loss": 1.3569, "step": 2363 }, { "epoch": 0.33863343360550063, "grad_norm": 1.1628680229187012, "learning_rate": 0.00015408004852392543, "loss": 1.4764, "step": 2364 }, { "epoch": 0.3387766795588025, "grad_norm": 1.1466549634933472, "learning_rate": 0.00015404101518270955, "loss": 1.5823, "step": 2365 }, { "epoch": 0.33891992551210426, "grad_norm": 1.175576090812683, "learning_rate": 0.00015400197020780038, "loss": 1.4831, "step": 2366 }, { "epoch": 0.3390631714654061, "grad_norm": 1.0311682224273682, "learning_rate": 0.0001539629136076034, "loss": 1.4604, "step": 2367 }, { "epoch": 0.33920641741870794, "grad_norm": 1.004539966583252, "learning_rate": 0.00015392384539052642, "loss": 1.4496, "step": 2368 }, { "epoch": 0.3393496633720097, "grad_norm": 1.1400903463363647, "learning_rate": 0.00015388476556498003, "loss": 1.2634, "step": 2369 }, { "epoch": 0.33949290932531156, "grad_norm": 1.298755407333374, "learning_rate": 0.000153845674139377, "loss": 1.3708, "step": 2370 }, { "epoch": 0.3396361552786134, "grad_norm": 1.0015780925750732, "learning_rate": 0.0001538065711221328, "loss": 1.6126, "step": 2371 }, { "epoch": 0.3397794012319152, "grad_norm": 0.9902368783950806, "learning_rate": 0.00015376745652166535, "loss": 1.55, "step": 2372 }, { "epoch": 0.33992264718521703, "grad_norm": 1.1694645881652832, "learning_rate": 0.0001537283303463951, "loss": 1.3771, "step": 2373 }, { "epoch": 0.3400658931385188, "grad_norm": 1.2839577198028564, "learning_rate": 0.00015368919260474483, "loss": 1.4093, "step": 2374 }, { "epoch": 0.34020913909182066, "grad_norm": 1.049832820892334, "learning_rate": 0.00015365004330514, "loss": 1.5767, "step": 2375 }, { "epoch": 0.3403523850451225, "grad_norm": 1.3304349184036255, "learning_rate": 0.0001536108824560085, "loss": 1.5181, "step": 2376 }, { "epoch": 0.3404956309984243, "grad_norm": 1.0783777236938477, "learning_rate": 0.00015357171006578067, "loss": 1.4845, "step": 2377 }, { "epoch": 0.3406388769517261, "grad_norm": 0.9464433789253235, "learning_rate": 0.00015353252614288935, "loss": 1.5138, "step": 2378 }, { "epoch": 0.3407821229050279, "grad_norm": 0.9589561820030212, "learning_rate": 0.0001534933306957699, "loss": 1.5157, "step": 2379 }, { "epoch": 0.34092536885832975, "grad_norm": 0.9670271277427673, "learning_rate": 0.00015345412373286008, "loss": 1.5581, "step": 2380 }, { "epoch": 0.3410686148116316, "grad_norm": 1.1025387048721313, "learning_rate": 0.00015341490526260017, "loss": 1.512, "step": 2381 }, { "epoch": 0.3412118607649334, "grad_norm": 1.3430148363113403, "learning_rate": 0.00015337567529343302, "loss": 1.4264, "step": 2382 }, { "epoch": 0.3413551067182352, "grad_norm": 1.1018345355987549, "learning_rate": 0.00015333643383380383, "loss": 1.5427, "step": 2383 }, { "epoch": 0.34149835267153705, "grad_norm": 1.0232715606689453, "learning_rate": 0.0001532971808921603, "loss": 1.4403, "step": 2384 }, { "epoch": 0.34164159862483884, "grad_norm": 1.0198861360549927, "learning_rate": 0.00015325791647695258, "loss": 1.5244, "step": 2385 }, { "epoch": 0.3417848445781407, "grad_norm": 1.1815303564071655, "learning_rate": 0.00015321864059663343, "loss": 1.5148, "step": 2386 }, { "epoch": 0.34192809053144246, "grad_norm": 1.2060497999191284, "learning_rate": 0.00015317935325965789, "loss": 1.6608, "step": 2387 }, { "epoch": 0.3420713364847443, "grad_norm": 1.074978232383728, "learning_rate": 0.00015314005447448356, "loss": 1.4645, "step": 2388 }, { "epoch": 0.34221458243804614, "grad_norm": 1.297765851020813, "learning_rate": 0.00015310074424957054, "loss": 1.5866, "step": 2389 }, { "epoch": 0.34235782839134793, "grad_norm": 1.212799310684204, "learning_rate": 0.00015306142259338129, "loss": 1.5388, "step": 2390 }, { "epoch": 0.34250107434464977, "grad_norm": 1.194262981414795, "learning_rate": 0.00015302208951438085, "loss": 1.3649, "step": 2391 }, { "epoch": 0.3426443202979516, "grad_norm": 1.2334723472595215, "learning_rate": 0.00015298274502103664, "loss": 1.3383, "step": 2392 }, { "epoch": 0.3427875662512534, "grad_norm": 1.1157070398330688, "learning_rate": 0.0001529433891218185, "loss": 1.4272, "step": 2393 }, { "epoch": 0.34293081220455524, "grad_norm": 1.2604163885116577, "learning_rate": 0.0001529040218251989, "loss": 1.4221, "step": 2394 }, { "epoch": 0.343074058157857, "grad_norm": 1.0845160484313965, "learning_rate": 0.00015286464313965254, "loss": 1.4989, "step": 2395 }, { "epoch": 0.34321730411115886, "grad_norm": 1.1471657752990723, "learning_rate": 0.0001528252530736567, "loss": 1.5288, "step": 2396 }, { "epoch": 0.3433605500644607, "grad_norm": 1.1633987426757812, "learning_rate": 0.00015278585163569107, "loss": 1.3403, "step": 2397 }, { "epoch": 0.3435037960177625, "grad_norm": 0.9312422275543213, "learning_rate": 0.0001527464388342379, "loss": 1.497, "step": 2398 }, { "epoch": 0.3436470419710643, "grad_norm": 1.208841323852539, "learning_rate": 0.00015270701467778167, "loss": 1.6432, "step": 2399 }, { "epoch": 0.3437902879243661, "grad_norm": 1.2214096784591675, "learning_rate": 0.00015266757917480948, "loss": 1.7182, "step": 2400 }, { "epoch": 0.34393353387766795, "grad_norm": 1.165998935699463, "learning_rate": 0.00015262813233381082, "loss": 1.5253, "step": 2401 }, { "epoch": 0.3440767798309698, "grad_norm": 1.301129937171936, "learning_rate": 0.00015258867416327758, "loss": 1.6513, "step": 2402 }, { "epoch": 0.3442200257842716, "grad_norm": 1.3936134576797485, "learning_rate": 0.0001525492046717042, "loss": 1.342, "step": 2403 }, { "epoch": 0.3443632717375734, "grad_norm": 1.006881594657898, "learning_rate": 0.00015250972386758745, "loss": 1.3893, "step": 2404 }, { "epoch": 0.34450651769087526, "grad_norm": 1.1349140405654907, "learning_rate": 0.00015247023175942657, "loss": 1.5365, "step": 2405 }, { "epoch": 0.34464976364417704, "grad_norm": 1.3732683658599854, "learning_rate": 0.00015243072835572318, "loss": 1.5158, "step": 2406 }, { "epoch": 0.3447930095974789, "grad_norm": 1.371100664138794, "learning_rate": 0.00015239121366498147, "loss": 1.4272, "step": 2407 }, { "epoch": 0.34493625555078067, "grad_norm": 1.141140341758728, "learning_rate": 0.00015235168769570792, "loss": 1.4489, "step": 2408 }, { "epoch": 0.3450795015040825, "grad_norm": 1.2566807270050049, "learning_rate": 0.0001523121504564115, "loss": 1.3978, "step": 2409 }, { "epoch": 0.34522274745738435, "grad_norm": 1.0878827571868896, "learning_rate": 0.00015227260195560367, "loss": 1.4564, "step": 2410 }, { "epoch": 0.34536599341068613, "grad_norm": 1.0803250074386597, "learning_rate": 0.00015223304220179812, "loss": 1.5442, "step": 2411 }, { "epoch": 0.345509239363988, "grad_norm": 1.2518631219863892, "learning_rate": 0.00015219347120351123, "loss": 1.6102, "step": 2412 }, { "epoch": 0.34565248531728976, "grad_norm": 1.0459473133087158, "learning_rate": 0.00015215388896926152, "loss": 1.4808, "step": 2413 }, { "epoch": 0.3457957312705916, "grad_norm": 1.0294448137283325, "learning_rate": 0.00015211429550757012, "loss": 1.5121, "step": 2414 }, { "epoch": 0.34593897722389344, "grad_norm": 1.2770122289657593, "learning_rate": 0.00015207469082696053, "loss": 1.4725, "step": 2415 }, { "epoch": 0.3460822231771952, "grad_norm": 0.9999268651008606, "learning_rate": 0.00015203507493595866, "loss": 1.4866, "step": 2416 }, { "epoch": 0.34622546913049707, "grad_norm": 0.9365148544311523, "learning_rate": 0.00015199544784309286, "loss": 1.5976, "step": 2417 }, { "epoch": 0.3463687150837989, "grad_norm": 1.1144593954086304, "learning_rate": 0.00015195580955689378, "loss": 1.269, "step": 2418 }, { "epoch": 0.3465119610371007, "grad_norm": 1.1058090925216675, "learning_rate": 0.00015191616008589461, "loss": 1.4277, "step": 2419 }, { "epoch": 0.34665520699040253, "grad_norm": 0.8692079186439514, "learning_rate": 0.00015187649943863095, "loss": 1.6169, "step": 2420 }, { "epoch": 0.3467984529437043, "grad_norm": 1.1412423849105835, "learning_rate": 0.00015183682762364066, "loss": 1.477, "step": 2421 }, { "epoch": 0.34694169889700616, "grad_norm": 0.9700475931167603, "learning_rate": 0.0001517971446494641, "loss": 1.5336, "step": 2422 }, { "epoch": 0.347084944850308, "grad_norm": 1.2372982501983643, "learning_rate": 0.00015175745052464415, "loss": 1.4172, "step": 2423 }, { "epoch": 0.3472281908036098, "grad_norm": 1.3604100942611694, "learning_rate": 0.00015171774525772592, "loss": 1.4789, "step": 2424 }, { "epoch": 0.3473714367569116, "grad_norm": 1.130306601524353, "learning_rate": 0.00015167802885725687, "loss": 1.5878, "step": 2425 }, { "epoch": 0.34751468271021346, "grad_norm": 1.3036110401153564, "learning_rate": 0.0001516383013317871, "loss": 1.4351, "step": 2426 }, { "epoch": 0.34765792866351525, "grad_norm": 1.4934039115905762, "learning_rate": 0.00015159856268986887, "loss": 1.4764, "step": 2427 }, { "epoch": 0.3478011746168171, "grad_norm": 1.0250859260559082, "learning_rate": 0.00015155881294005697, "loss": 1.5109, "step": 2428 }, { "epoch": 0.3479444205701189, "grad_norm": 1.089347243309021, "learning_rate": 0.00015151905209090854, "loss": 1.3593, "step": 2429 }, { "epoch": 0.3480876665234207, "grad_norm": 1.2811094522476196, "learning_rate": 0.0001514792801509831, "loss": 1.4851, "step": 2430 }, { "epoch": 0.34823091247672255, "grad_norm": 1.446021318435669, "learning_rate": 0.00015143949712884252, "loss": 1.4453, "step": 2431 }, { "epoch": 0.34837415843002434, "grad_norm": 1.3093568086624146, "learning_rate": 0.00015139970303305119, "loss": 1.429, "step": 2432 }, { "epoch": 0.3485174043833262, "grad_norm": 1.2506701946258545, "learning_rate": 0.00015135989787217567, "loss": 1.2644, "step": 2433 }, { "epoch": 0.34866065033662796, "grad_norm": 1.0441100597381592, "learning_rate": 0.00015132008165478516, "loss": 1.3778, "step": 2434 }, { "epoch": 0.3488038962899298, "grad_norm": 0.9274334907531738, "learning_rate": 0.00015128025438945102, "loss": 1.5475, "step": 2435 }, { "epoch": 0.34894714224323164, "grad_norm": 1.1874804496765137, "learning_rate": 0.0001512404160847471, "loss": 1.5333, "step": 2436 }, { "epoch": 0.34909038819653343, "grad_norm": 1.101256251335144, "learning_rate": 0.0001512005667492496, "loss": 1.4267, "step": 2437 }, { "epoch": 0.34923363414983527, "grad_norm": 0.9447908997535706, "learning_rate": 0.0001511607063915371, "loss": 1.4612, "step": 2438 }, { "epoch": 0.3493768801031371, "grad_norm": 0.9819499254226685, "learning_rate": 0.00015112083502019056, "loss": 1.5346, "step": 2439 }, { "epoch": 0.3495201260564389, "grad_norm": 1.075096607208252, "learning_rate": 0.00015108095264379325, "loss": 1.6348, "step": 2440 }, { "epoch": 0.34966337200974074, "grad_norm": 1.1388366222381592, "learning_rate": 0.00015104105927093092, "loss": 1.2484, "step": 2441 }, { "epoch": 0.3498066179630425, "grad_norm": 1.578209400177002, "learning_rate": 0.0001510011549101916, "loss": 1.508, "step": 2442 }, { "epoch": 0.34994986391634436, "grad_norm": 1.0529425144195557, "learning_rate": 0.00015096123957016565, "loss": 1.5795, "step": 2443 }, { "epoch": 0.3500931098696462, "grad_norm": 1.1954271793365479, "learning_rate": 0.000150921313259446, "loss": 1.4587, "step": 2444 }, { "epoch": 0.350236355822948, "grad_norm": 1.1856555938720703, "learning_rate": 0.0001508813759866277, "loss": 1.4059, "step": 2445 }, { "epoch": 0.3503796017762498, "grad_norm": 1.0036766529083252, "learning_rate": 0.00015084142776030824, "loss": 1.4509, "step": 2446 }, { "epoch": 0.35052284772955167, "grad_norm": 0.9608394503593445, "learning_rate": 0.0001508014685890875, "loss": 1.4979, "step": 2447 }, { "epoch": 0.35066609368285345, "grad_norm": 1.2596564292907715, "learning_rate": 0.00015076149848156775, "loss": 1.6497, "step": 2448 }, { "epoch": 0.3508093396361553, "grad_norm": 0.9816955924034119, "learning_rate": 0.00015072151744635352, "loss": 1.3047, "step": 2449 }, { "epoch": 0.3509525855894571, "grad_norm": 1.047676682472229, "learning_rate": 0.00015068152549205173, "loss": 1.4998, "step": 2450 }, { "epoch": 0.3510958315427589, "grad_norm": 1.022405743598938, "learning_rate": 0.0001506415226272717, "loss": 1.4455, "step": 2451 }, { "epoch": 0.35123907749606076, "grad_norm": 1.0790386199951172, "learning_rate": 0.000150601508860625, "loss": 1.4453, "step": 2452 }, { "epoch": 0.35138232344936254, "grad_norm": 0.9402562379837036, "learning_rate": 0.00015056148420072564, "loss": 1.3687, "step": 2453 }, { "epoch": 0.3515255694026644, "grad_norm": 1.2030194997787476, "learning_rate": 0.00015052144865618995, "loss": 1.3809, "step": 2454 }, { "epoch": 0.35166881535596617, "grad_norm": 1.1288782358169556, "learning_rate": 0.0001504814022356366, "loss": 1.588, "step": 2455 }, { "epoch": 0.351812061309268, "grad_norm": 1.2761846780776978, "learning_rate": 0.0001504413449476865, "loss": 1.6743, "step": 2456 }, { "epoch": 0.35195530726256985, "grad_norm": 0.9686523079872131, "learning_rate": 0.00015040127680096313, "loss": 1.4878, "step": 2457 }, { "epoch": 0.35209855321587163, "grad_norm": 1.1332261562347412, "learning_rate": 0.00015036119780409207, "loss": 1.4049, "step": 2458 }, { "epoch": 0.3522417991691735, "grad_norm": 1.0721514225006104, "learning_rate": 0.00015032110796570137, "loss": 1.3662, "step": 2459 }, { "epoch": 0.3523850451224753, "grad_norm": 1.0312167406082153, "learning_rate": 0.00015028100729442138, "loss": 1.216, "step": 2460 }, { "epoch": 0.3525282910757771, "grad_norm": 1.0709669589996338, "learning_rate": 0.00015024089579888478, "loss": 1.3847, "step": 2461 }, { "epoch": 0.35267153702907894, "grad_norm": 1.0403696298599243, "learning_rate": 0.0001502007734877266, "loss": 1.5603, "step": 2462 }, { "epoch": 0.3528147829823807, "grad_norm": 1.136157512664795, "learning_rate": 0.00015016064036958414, "loss": 1.2572, "step": 2463 }, { "epoch": 0.35295802893568257, "grad_norm": 1.1979122161865234, "learning_rate": 0.00015012049645309712, "loss": 1.4414, "step": 2464 }, { "epoch": 0.3531012748889844, "grad_norm": 1.1076048612594604, "learning_rate": 0.0001500803417469075, "loss": 1.3954, "step": 2465 }, { "epoch": 0.3532445208422862, "grad_norm": 1.1115305423736572, "learning_rate": 0.00015004017625965958, "loss": 1.5678, "step": 2466 }, { "epoch": 0.35338776679558803, "grad_norm": 1.1326771974563599, "learning_rate": 0.00015000000000000001, "loss": 1.448, "step": 2467 }, { "epoch": 0.3535310127488898, "grad_norm": 1.0988236665725708, "learning_rate": 0.00014995981297657776, "loss": 1.5052, "step": 2468 }, { "epoch": 0.35367425870219166, "grad_norm": 1.1608054637908936, "learning_rate": 0.00014991961519804408, "loss": 1.4011, "step": 2469 }, { "epoch": 0.3538175046554935, "grad_norm": 1.0278706550598145, "learning_rate": 0.00014987940667305258, "loss": 1.6579, "step": 2470 }, { "epoch": 0.3539607506087953, "grad_norm": 0.9136257767677307, "learning_rate": 0.00014983918741025916, "loss": 1.2434, "step": 2471 }, { "epoch": 0.3541039965620971, "grad_norm": 1.0890923738479614, "learning_rate": 0.00014979895741832198, "loss": 1.3587, "step": 2472 }, { "epoch": 0.35424724251539896, "grad_norm": 1.4404780864715576, "learning_rate": 0.00014975871670590163, "loss": 1.6476, "step": 2473 }, { "epoch": 0.35439048846870075, "grad_norm": 1.3689135313034058, "learning_rate": 0.0001497184652816609, "loss": 1.5361, "step": 2474 }, { "epoch": 0.3545337344220026, "grad_norm": 1.3381685018539429, "learning_rate": 0.0001496782031542649, "loss": 1.4194, "step": 2475 }, { "epoch": 0.3546769803753044, "grad_norm": 1.2028056383132935, "learning_rate": 0.0001496379303323812, "loss": 1.6739, "step": 2476 }, { "epoch": 0.3548202263286062, "grad_norm": 1.3352667093276978, "learning_rate": 0.00014959764682467933, "loss": 1.4492, "step": 2477 }, { "epoch": 0.35496347228190805, "grad_norm": 1.2482444047927856, "learning_rate": 0.00014955735263983154, "loss": 1.569, "step": 2478 }, { "epoch": 0.35510671823520984, "grad_norm": 1.0980826616287231, "learning_rate": 0.00014951704778651202, "loss": 1.506, "step": 2479 }, { "epoch": 0.3552499641885117, "grad_norm": 1.3518425226211548, "learning_rate": 0.00014947673227339755, "loss": 1.453, "step": 2480 }, { "epoch": 0.3553932101418135, "grad_norm": 1.0751659870147705, "learning_rate": 0.00014943640610916688, "loss": 1.468, "step": 2481 }, { "epoch": 0.3555364560951153, "grad_norm": 1.1745363473892212, "learning_rate": 0.00014939606930250142, "loss": 1.3486, "step": 2482 }, { "epoch": 0.35567970204841715, "grad_norm": 0.9499922394752502, "learning_rate": 0.00014935572186208456, "loss": 1.3732, "step": 2483 }, { "epoch": 0.35582294800171893, "grad_norm": 1.098778486251831, "learning_rate": 0.00014931536379660213, "loss": 1.611, "step": 2484 }, { "epoch": 0.35596619395502077, "grad_norm": 1.059660792350769, "learning_rate": 0.00014927499511474228, "loss": 1.4004, "step": 2485 }, { "epoch": 0.3561094399083226, "grad_norm": 1.108597993850708, "learning_rate": 0.00014923461582519532, "loss": 1.4137, "step": 2486 }, { "epoch": 0.3562526858616244, "grad_norm": 1.218572974205017, "learning_rate": 0.00014919422593665397, "loss": 1.3978, "step": 2487 }, { "epoch": 0.35639593181492624, "grad_norm": 1.0528897047042847, "learning_rate": 0.00014915382545781315, "loss": 1.59, "step": 2488 }, { "epoch": 0.356539177768228, "grad_norm": 1.4149610996246338, "learning_rate": 0.00014911341439737002, "loss": 1.3505, "step": 2489 }, { "epoch": 0.35668242372152986, "grad_norm": 1.1221262216567993, "learning_rate": 0.00014907299276402418, "loss": 1.5635, "step": 2490 }, { "epoch": 0.3568256696748317, "grad_norm": 1.222782850265503, "learning_rate": 0.00014903256056647736, "loss": 1.4488, "step": 2491 }, { "epoch": 0.3569689156281335, "grad_norm": 1.1195868253707886, "learning_rate": 0.0001489921178134336, "loss": 1.5141, "step": 2492 }, { "epoch": 0.35711216158143533, "grad_norm": 1.2428107261657715, "learning_rate": 0.0001489516645135993, "loss": 1.4349, "step": 2493 }, { "epoch": 0.35725540753473717, "grad_norm": 1.0416040420532227, "learning_rate": 0.00014891120067568294, "loss": 1.4898, "step": 2494 }, { "epoch": 0.35739865348803895, "grad_norm": 1.1553033590316772, "learning_rate": 0.00014887072630839546, "loss": 1.457, "step": 2495 }, { "epoch": 0.3575418994413408, "grad_norm": 0.9414494037628174, "learning_rate": 0.00014883024142044995, "loss": 1.5649, "step": 2496 }, { "epoch": 0.3576851453946426, "grad_norm": 1.128084659576416, "learning_rate": 0.00014878974602056181, "loss": 1.3616, "step": 2497 }, { "epoch": 0.3578283913479444, "grad_norm": 1.351125717163086, "learning_rate": 0.00014874924011744876, "loss": 1.3059, "step": 2498 }, { "epoch": 0.35797163730124626, "grad_norm": 1.060027837753296, "learning_rate": 0.00014870872371983062, "loss": 1.4246, "step": 2499 }, { "epoch": 0.35811488325454804, "grad_norm": 1.2175381183624268, "learning_rate": 0.00014866819683642966, "loss": 1.391, "step": 2500 }, { "epoch": 0.3582581292078499, "grad_norm": 1.1252983808517456, "learning_rate": 0.00014862765947597025, "loss": 1.3124, "step": 2501 }, { "epoch": 0.3584013751611517, "grad_norm": 1.0511078834533691, "learning_rate": 0.00014858711164717912, "loss": 1.5625, "step": 2502 }, { "epoch": 0.3585446211144535, "grad_norm": 1.038171648979187, "learning_rate": 0.00014854655335878517, "loss": 1.5304, "step": 2503 }, { "epoch": 0.35868786706775535, "grad_norm": 1.052503228187561, "learning_rate": 0.00014850598461951963, "loss": 1.3118, "step": 2504 }, { "epoch": 0.35883111302105714, "grad_norm": 1.0193835496902466, "learning_rate": 0.00014846540543811596, "loss": 1.4586, "step": 2505 }, { "epoch": 0.358974358974359, "grad_norm": 1.1015459299087524, "learning_rate": 0.0001484248158233098, "loss": 1.3647, "step": 2506 }, { "epoch": 0.3591176049276608, "grad_norm": 1.1612577438354492, "learning_rate": 0.00014838421578383914, "loss": 1.4625, "step": 2507 }, { "epoch": 0.3592608508809626, "grad_norm": 1.0630440711975098, "learning_rate": 0.00014834360532844413, "loss": 1.3518, "step": 2508 }, { "epoch": 0.35940409683426444, "grad_norm": 0.9513444304466248, "learning_rate": 0.0001483029844658672, "loss": 1.4603, "step": 2509 }, { "epoch": 0.3595473427875662, "grad_norm": 1.243257761001587, "learning_rate": 0.00014826235320485306, "loss": 1.4823, "step": 2510 }, { "epoch": 0.35969058874086807, "grad_norm": 0.9204735159873962, "learning_rate": 0.00014822171155414856, "loss": 1.585, "step": 2511 }, { "epoch": 0.3598338346941699, "grad_norm": 1.1022237539291382, "learning_rate": 0.00014818105952250292, "loss": 1.3876, "step": 2512 }, { "epoch": 0.3599770806474717, "grad_norm": 1.1986637115478516, "learning_rate": 0.0001481403971186674, "loss": 1.3128, "step": 2513 }, { "epoch": 0.36012032660077353, "grad_norm": 1.3053021430969238, "learning_rate": 0.00014809972435139568, "loss": 1.4194, "step": 2514 }, { "epoch": 0.3602635725540754, "grad_norm": 1.248697280883789, "learning_rate": 0.0001480590412294436, "loss": 1.2864, "step": 2515 }, { "epoch": 0.36040681850737716, "grad_norm": 0.9447510242462158, "learning_rate": 0.00014801834776156925, "loss": 1.6017, "step": 2516 }, { "epoch": 0.360550064460679, "grad_norm": 1.0043679475784302, "learning_rate": 0.00014797764395653283, "loss": 1.4786, "step": 2517 }, { "epoch": 0.3606933104139808, "grad_norm": 1.1880528926849365, "learning_rate": 0.000147936929823097, "loss": 1.3449, "step": 2518 }, { "epoch": 0.3608365563672826, "grad_norm": 1.0238244533538818, "learning_rate": 0.00014789620537002639, "loss": 1.4868, "step": 2519 }, { "epoch": 0.36097980232058446, "grad_norm": 1.4210186004638672, "learning_rate": 0.000147855470606088, "loss": 1.5587, "step": 2520 }, { "epoch": 0.36112304827388625, "grad_norm": 1.2720956802368164, "learning_rate": 0.00014781472554005107, "loss": 1.4834, "step": 2521 }, { "epoch": 0.3612662942271881, "grad_norm": 1.132568597793579, "learning_rate": 0.0001477739701806869, "loss": 1.4971, "step": 2522 }, { "epoch": 0.3614095401804899, "grad_norm": 0.9899293780326843, "learning_rate": 0.00014773320453676924, "loss": 1.3419, "step": 2523 }, { "epoch": 0.3615527861337917, "grad_norm": 1.2437946796417236, "learning_rate": 0.00014769242861707382, "loss": 1.4447, "step": 2524 }, { "epoch": 0.36169603208709356, "grad_norm": 1.1225162744522095, "learning_rate": 0.00014765164243037875, "loss": 1.4871, "step": 2525 }, { "epoch": 0.36183927804039534, "grad_norm": 1.014938473701477, "learning_rate": 0.0001476108459854642, "loss": 1.4887, "step": 2526 }, { "epoch": 0.3619825239936972, "grad_norm": 1.2254246473312378, "learning_rate": 0.00014757003929111276, "loss": 1.4382, "step": 2527 }, { "epoch": 0.362125769946999, "grad_norm": 1.2509281635284424, "learning_rate": 0.000147529222356109, "loss": 1.4276, "step": 2528 }, { "epoch": 0.3622690159003008, "grad_norm": 1.1786195039749146, "learning_rate": 0.00014748839518923985, "loss": 1.4581, "step": 2529 }, { "epoch": 0.36241226185360265, "grad_norm": 1.2477809190750122, "learning_rate": 0.00014744755779929437, "loss": 1.5629, "step": 2530 }, { "epoch": 0.36255550780690443, "grad_norm": 1.0423568487167358, "learning_rate": 0.00014740671019506383, "loss": 1.4088, "step": 2531 }, { "epoch": 0.36269875376020627, "grad_norm": 0.9987547397613525, "learning_rate": 0.00014736585238534172, "loss": 1.5414, "step": 2532 }, { "epoch": 0.3628419997135081, "grad_norm": 0.8615167737007141, "learning_rate": 0.00014732498437892373, "loss": 1.5009, "step": 2533 }, { "epoch": 0.3629852456668099, "grad_norm": 1.2286875247955322, "learning_rate": 0.0001472841061846077, "loss": 1.4962, "step": 2534 }, { "epoch": 0.36312849162011174, "grad_norm": 0.9477217197418213, "learning_rate": 0.0001472432178111937, "loss": 1.4642, "step": 2535 }, { "epoch": 0.3632717375734136, "grad_norm": 1.3960607051849365, "learning_rate": 0.000147202319267484, "loss": 1.5182, "step": 2536 }, { "epoch": 0.36341498352671536, "grad_norm": 1.061038851737976, "learning_rate": 0.00014716141056228305, "loss": 1.495, "step": 2537 }, { "epoch": 0.3635582294800172, "grad_norm": 1.4418344497680664, "learning_rate": 0.00014712049170439748, "loss": 1.2843, "step": 2538 }, { "epoch": 0.363701475433319, "grad_norm": 1.023033857345581, "learning_rate": 0.00014707956270263605, "loss": 1.4014, "step": 2539 }, { "epoch": 0.36384472138662083, "grad_norm": 1.067755937576294, "learning_rate": 0.00014703862356580985, "loss": 1.4649, "step": 2540 }, { "epoch": 0.36398796733992267, "grad_norm": 1.038650631904602, "learning_rate": 0.000146997674302732, "loss": 1.4716, "step": 2541 }, { "epoch": 0.36413121329322445, "grad_norm": 1.2082433700561523, "learning_rate": 0.00014695671492221792, "loss": 1.4394, "step": 2542 }, { "epoch": 0.3642744592465263, "grad_norm": 0.9982503652572632, "learning_rate": 0.0001469157454330851, "loss": 1.5247, "step": 2543 }, { "epoch": 0.3644177051998281, "grad_norm": 0.9793524146080017, "learning_rate": 0.00014687476584415325, "loss": 1.3516, "step": 2544 }, { "epoch": 0.3645609511531299, "grad_norm": 1.0603388547897339, "learning_rate": 0.00014683377616424428, "loss": 1.6374, "step": 2545 }, { "epoch": 0.36470419710643176, "grad_norm": 0.996548056602478, "learning_rate": 0.0001467927764021823, "loss": 1.3796, "step": 2546 }, { "epoch": 0.36484744305973354, "grad_norm": 1.0186673402786255, "learning_rate": 0.00014675176656679345, "loss": 1.3512, "step": 2547 }, { "epoch": 0.3649906890130354, "grad_norm": 1.0116450786590576, "learning_rate": 0.0001467107466669062, "loss": 1.4934, "step": 2548 }, { "epoch": 0.3651339349663372, "grad_norm": 1.010191559791565, "learning_rate": 0.00014666971671135112, "loss": 1.3588, "step": 2549 }, { "epoch": 0.365277180919639, "grad_norm": 1.325577974319458, "learning_rate": 0.00014662867670896094, "loss": 1.3208, "step": 2550 }, { "epoch": 0.36542042687294085, "grad_norm": 1.0123873949050903, "learning_rate": 0.00014658762666857052, "loss": 1.5638, "step": 2551 }, { "epoch": 0.36556367282624264, "grad_norm": 1.0866594314575195, "learning_rate": 0.00014654656659901695, "loss": 1.4287, "step": 2552 }, { "epoch": 0.3657069187795445, "grad_norm": 1.2401455640792847, "learning_rate": 0.00014650549650913945, "loss": 1.4929, "step": 2553 }, { "epoch": 0.3658501647328463, "grad_norm": 1.2523385286331177, "learning_rate": 0.00014646441640777936, "loss": 1.3921, "step": 2554 }, { "epoch": 0.3659934106861481, "grad_norm": 1.0730689764022827, "learning_rate": 0.0001464233263037803, "loss": 1.4909, "step": 2555 }, { "epoch": 0.36613665663944994, "grad_norm": 1.4879099130630493, "learning_rate": 0.00014638222620598777, "loss": 1.4058, "step": 2556 }, { "epoch": 0.3662799025927517, "grad_norm": 1.0155384540557861, "learning_rate": 0.00014634111612324982, "loss": 1.4914, "step": 2557 }, { "epoch": 0.36642314854605357, "grad_norm": 1.0073775053024292, "learning_rate": 0.0001462999960644163, "loss": 1.5356, "step": 2558 }, { "epoch": 0.3665663944993554, "grad_norm": 0.9533057808876038, "learning_rate": 0.00014625886603833937, "loss": 1.4409, "step": 2559 }, { "epoch": 0.3667096404526572, "grad_norm": 1.0768482685089111, "learning_rate": 0.0001462177260538733, "loss": 1.7473, "step": 2560 }, { "epoch": 0.36685288640595903, "grad_norm": 1.1366655826568604, "learning_rate": 0.00014617657611987455, "loss": 1.3921, "step": 2561 }, { "epoch": 0.3669961323592609, "grad_norm": 1.1692472696304321, "learning_rate": 0.00014613541624520165, "loss": 1.4196, "step": 2562 }, { "epoch": 0.36713937831256266, "grad_norm": 0.9287104606628418, "learning_rate": 0.0001460942464387153, "loss": 1.4302, "step": 2563 }, { "epoch": 0.3672826242658645, "grad_norm": 1.336983561515808, "learning_rate": 0.0001460530667092783, "loss": 1.3601, "step": 2564 }, { "epoch": 0.3674258702191663, "grad_norm": 1.1765611171722412, "learning_rate": 0.00014601187706575572, "loss": 1.4402, "step": 2565 }, { "epoch": 0.3675691161724681, "grad_norm": 1.1721441745758057, "learning_rate": 0.00014597067751701465, "loss": 1.5211, "step": 2566 }, { "epoch": 0.36771236212576996, "grad_norm": 1.1252217292785645, "learning_rate": 0.00014592946807192426, "loss": 1.548, "step": 2567 }, { "epoch": 0.36785560807907175, "grad_norm": 1.0455622673034668, "learning_rate": 0.000145888248739356, "loss": 1.4642, "step": 2568 }, { "epoch": 0.3679988540323736, "grad_norm": 0.9194879531860352, "learning_rate": 0.00014584701952818333, "loss": 1.4646, "step": 2569 }, { "epoch": 0.36814209998567543, "grad_norm": 1.1840286254882812, "learning_rate": 0.00014580578044728188, "loss": 1.431, "step": 2570 }, { "epoch": 0.3682853459389772, "grad_norm": 1.011466383934021, "learning_rate": 0.00014576453150552947, "loss": 1.5775, "step": 2571 }, { "epoch": 0.36842859189227906, "grad_norm": 1.0589476823806763, "learning_rate": 0.00014572327271180586, "loss": 1.2801, "step": 2572 }, { "epoch": 0.36857183784558084, "grad_norm": 1.0883053541183472, "learning_rate": 0.00014568200407499314, "loss": 1.4236, "step": 2573 }, { "epoch": 0.3687150837988827, "grad_norm": 1.0136991739273071, "learning_rate": 0.00014564072560397542, "loss": 1.4541, "step": 2574 }, { "epoch": 0.3688583297521845, "grad_norm": 1.1944260597229004, "learning_rate": 0.00014559943730763889, "loss": 1.5655, "step": 2575 }, { "epoch": 0.3690015757054863, "grad_norm": 1.1560734510421753, "learning_rate": 0.0001455581391948719, "loss": 1.425, "step": 2576 }, { "epoch": 0.36914482165878815, "grad_norm": 1.2052932977676392, "learning_rate": 0.00014551683127456494, "loss": 1.474, "step": 2577 }, { "epoch": 0.36928806761208993, "grad_norm": 0.9979791641235352, "learning_rate": 0.0001454755135556106, "loss": 1.3691, "step": 2578 }, { "epoch": 0.3694313135653918, "grad_norm": 1.409983515739441, "learning_rate": 0.0001454341860469035, "loss": 1.3881, "step": 2579 }, { "epoch": 0.3695745595186936, "grad_norm": 1.2113723754882812, "learning_rate": 0.0001453928487573405, "loss": 1.4127, "step": 2580 }, { "epoch": 0.3697178054719954, "grad_norm": 0.9448793530464172, "learning_rate": 0.0001453515016958204, "loss": 1.3904, "step": 2581 }, { "epoch": 0.36986105142529724, "grad_norm": 1.1924959421157837, "learning_rate": 0.00014531014487124432, "loss": 1.3369, "step": 2582 }, { "epoch": 0.3700042973785991, "grad_norm": 1.3233561515808105, "learning_rate": 0.00014526877829251528, "loss": 1.4517, "step": 2583 }, { "epoch": 0.37014754333190086, "grad_norm": 1.2325234413146973, "learning_rate": 0.00014522740196853853, "loss": 1.4014, "step": 2584 }, { "epoch": 0.3702907892852027, "grad_norm": 1.2031364440917969, "learning_rate": 0.0001451860159082213, "loss": 1.4983, "step": 2585 }, { "epoch": 0.3704340352385045, "grad_norm": 1.0210764408111572, "learning_rate": 0.00014514462012047306, "loss": 1.4157, "step": 2586 }, { "epoch": 0.37057728119180633, "grad_norm": 0.9976498484611511, "learning_rate": 0.00014510321461420523, "loss": 1.5828, "step": 2587 }, { "epoch": 0.37072052714510817, "grad_norm": 1.2574971914291382, "learning_rate": 0.00014506179939833142, "loss": 1.521, "step": 2588 }, { "epoch": 0.37086377309840995, "grad_norm": 0.9519497156143188, "learning_rate": 0.00014502037448176734, "loss": 1.6336, "step": 2589 }, { "epoch": 0.3710070190517118, "grad_norm": 0.994592547416687, "learning_rate": 0.0001449789398734307, "loss": 1.2408, "step": 2590 }, { "epoch": 0.37115026500501364, "grad_norm": 1.2184882164001465, "learning_rate": 0.00014493749558224138, "loss": 1.4688, "step": 2591 }, { "epoch": 0.3712935109583154, "grad_norm": 1.0672271251678467, "learning_rate": 0.00014489604161712128, "loss": 1.4611, "step": 2592 }, { "epoch": 0.37143675691161726, "grad_norm": 1.1550872325897217, "learning_rate": 0.0001448545779869944, "loss": 1.3873, "step": 2593 }, { "epoch": 0.37158000286491905, "grad_norm": 1.1462234258651733, "learning_rate": 0.00014481310470078687, "loss": 1.5359, "step": 2594 }, { "epoch": 0.3717232488182209, "grad_norm": 1.1339164972305298, "learning_rate": 0.00014477162176742688, "loss": 1.4471, "step": 2595 }, { "epoch": 0.3718664947715227, "grad_norm": 1.2721680402755737, "learning_rate": 0.00014473012919584462, "loss": 1.4699, "step": 2596 }, { "epoch": 0.3720097407248245, "grad_norm": 1.2865521907806396, "learning_rate": 0.00014468862699497243, "loss": 1.7196, "step": 2597 }, { "epoch": 0.37215298667812635, "grad_norm": 0.9609204530715942, "learning_rate": 0.00014464711517374475, "loss": 1.4763, "step": 2598 }, { "epoch": 0.37229623263142814, "grad_norm": 1.0643188953399658, "learning_rate": 0.000144605593741098, "loss": 1.4065, "step": 2599 }, { "epoch": 0.37243947858473, "grad_norm": 1.081160545349121, "learning_rate": 0.00014456406270597073, "loss": 1.4513, "step": 2600 }, { "epoch": 0.3725827245380318, "grad_norm": 1.2300240993499756, "learning_rate": 0.00014452252207730354, "loss": 1.5522, "step": 2601 }, { "epoch": 0.3727259704913336, "grad_norm": 1.0324400663375854, "learning_rate": 0.00014448097186403914, "loss": 1.3137, "step": 2602 }, { "epoch": 0.37286921644463544, "grad_norm": 1.4336881637573242, "learning_rate": 0.0001444394120751222, "loss": 1.4175, "step": 2603 }, { "epoch": 0.3730124623979373, "grad_norm": 1.417158603668213, "learning_rate": 0.0001443978427194996, "loss": 1.4492, "step": 2604 }, { "epoch": 0.37315570835123907, "grad_norm": 1.1301422119140625, "learning_rate": 0.0001443562638061201, "loss": 1.3633, "step": 2605 }, { "epoch": 0.3732989543045409, "grad_norm": 1.3131352663040161, "learning_rate": 0.00014431467534393463, "loss": 1.315, "step": 2606 }, { "epoch": 0.3734422002578427, "grad_norm": 1.1095761060714722, "learning_rate": 0.0001442730773418962, "loss": 1.3941, "step": 2607 }, { "epoch": 0.37358544621114453, "grad_norm": 0.9951792359352112, "learning_rate": 0.0001442314698089598, "loss": 1.5122, "step": 2608 }, { "epoch": 0.3737286921644464, "grad_norm": 1.4373977184295654, "learning_rate": 0.00014418985275408254, "loss": 1.3482, "step": 2609 }, { "epoch": 0.37387193811774816, "grad_norm": 1.4116655588150024, "learning_rate": 0.00014414822618622345, "loss": 1.2703, "step": 2610 }, { "epoch": 0.37401518407105, "grad_norm": 1.2531754970550537, "learning_rate": 0.00014410659011434383, "loss": 1.5549, "step": 2611 }, { "epoch": 0.3741584300243518, "grad_norm": 1.1813033819198608, "learning_rate": 0.00014406494454740677, "loss": 1.444, "step": 2612 }, { "epoch": 0.3743016759776536, "grad_norm": 1.1966707706451416, "learning_rate": 0.0001440232894943776, "loss": 1.4333, "step": 2613 }, { "epoch": 0.37444492193095547, "grad_norm": 1.2637412548065186, "learning_rate": 0.00014398162496422363, "loss": 1.3979, "step": 2614 }, { "epoch": 0.37458816788425725, "grad_norm": 1.1271389722824097, "learning_rate": 0.00014393995096591416, "loss": 1.3936, "step": 2615 }, { "epoch": 0.3747314138375591, "grad_norm": 1.082987666130066, "learning_rate": 0.0001438982675084206, "loss": 1.5254, "step": 2616 }, { "epoch": 0.37487465979086093, "grad_norm": 1.1699860095977783, "learning_rate": 0.00014385657460071639, "loss": 1.3008, "step": 2617 }, { "epoch": 0.3750179057441627, "grad_norm": 1.2353825569152832, "learning_rate": 0.0001438148722517769, "loss": 1.6122, "step": 2618 }, { "epoch": 0.37516115169746456, "grad_norm": 1.0600746870040894, "learning_rate": 0.0001437731604705797, "loss": 1.438, "step": 2619 }, { "epoch": 0.37530439765076634, "grad_norm": 1.2206144332885742, "learning_rate": 0.00014373143926610425, "loss": 1.2056, "step": 2620 }, { "epoch": 0.3754476436040682, "grad_norm": 1.0833669900894165, "learning_rate": 0.0001436897086473321, "loss": 1.3367, "step": 2621 }, { "epoch": 0.37559088955737, "grad_norm": 1.298516869544983, "learning_rate": 0.00014364796862324685, "loss": 1.5758, "step": 2622 }, { "epoch": 0.3757341355106718, "grad_norm": 1.0810481309890747, "learning_rate": 0.00014360621920283406, "loss": 1.4324, "step": 2623 }, { "epoch": 0.37587738146397365, "grad_norm": 1.0148545503616333, "learning_rate": 0.00014356446039508138, "loss": 1.5731, "step": 2624 }, { "epoch": 0.3760206274172755, "grad_norm": 1.1309711933135986, "learning_rate": 0.00014352269220897844, "loss": 1.3841, "step": 2625 }, { "epoch": 0.3761638733705773, "grad_norm": 1.2754181623458862, "learning_rate": 0.00014348091465351683, "loss": 1.4201, "step": 2626 }, { "epoch": 0.3763071193238791, "grad_norm": 1.0657199621200562, "learning_rate": 0.00014343912773769036, "loss": 1.477, "step": 2627 }, { "epoch": 0.3764503652771809, "grad_norm": 1.155584454536438, "learning_rate": 0.00014339733147049458, "loss": 1.559, "step": 2628 }, { "epoch": 0.37659361123048274, "grad_norm": 0.9936962723731995, "learning_rate": 0.0001433555258609273, "loss": 1.5949, "step": 2629 }, { "epoch": 0.3767368571837846, "grad_norm": 1.0808379650115967, "learning_rate": 0.0001433137109179881, "loss": 1.643, "step": 2630 }, { "epoch": 0.37688010313708636, "grad_norm": 1.1006921529769897, "learning_rate": 0.00014327188665067887, "loss": 1.4214, "step": 2631 }, { "epoch": 0.3770233490903882, "grad_norm": 1.2167447805404663, "learning_rate": 0.00014323005306800322, "loss": 1.5325, "step": 2632 }, { "epoch": 0.37716659504369, "grad_norm": 1.020889163017273, "learning_rate": 0.00014318821017896693, "loss": 1.4068, "step": 2633 }, { "epoch": 0.37730984099699183, "grad_norm": 1.093817949295044, "learning_rate": 0.00014314635799257775, "loss": 1.5973, "step": 2634 }, { "epoch": 0.37745308695029367, "grad_norm": 1.0956398248672485, "learning_rate": 0.00014310449651784536, "loss": 1.4462, "step": 2635 }, { "epoch": 0.37759633290359546, "grad_norm": 0.9416504502296448, "learning_rate": 0.00014306262576378157, "loss": 1.5593, "step": 2636 }, { "epoch": 0.3777395788568973, "grad_norm": 1.121124267578125, "learning_rate": 0.00014302074573940008, "loss": 1.3173, "step": 2637 }, { "epoch": 0.37788282481019914, "grad_norm": 1.0044233798980713, "learning_rate": 0.00014297885645371663, "loss": 1.6411, "step": 2638 }, { "epoch": 0.3780260707635009, "grad_norm": 0.9457429051399231, "learning_rate": 0.00014293695791574895, "loss": 1.5112, "step": 2639 }, { "epoch": 0.37816931671680276, "grad_norm": 1.0443371534347534, "learning_rate": 0.00014289505013451677, "loss": 1.4621, "step": 2640 }, { "epoch": 0.37831256267010455, "grad_norm": 1.0153977870941162, "learning_rate": 0.00014285313311904177, "loss": 1.472, "step": 2641 }, { "epoch": 0.3784558086234064, "grad_norm": 1.3298951387405396, "learning_rate": 0.00014281120687834764, "loss": 1.445, "step": 2642 }, { "epoch": 0.3785990545767082, "grad_norm": 1.1219143867492676, "learning_rate": 0.0001427692714214601, "loss": 1.3953, "step": 2643 }, { "epoch": 0.37874230053001, "grad_norm": 1.1074706315994263, "learning_rate": 0.0001427273267574068, "loss": 1.4122, "step": 2644 }, { "epoch": 0.37888554648331185, "grad_norm": 1.0301778316497803, "learning_rate": 0.0001426853728952174, "loss": 1.3201, "step": 2645 }, { "epoch": 0.3790287924366137, "grad_norm": 1.1880346536636353, "learning_rate": 0.0001426434098439235, "loss": 1.4723, "step": 2646 }, { "epoch": 0.3791720383899155, "grad_norm": 1.14169180393219, "learning_rate": 0.0001426014376125587, "loss": 1.4408, "step": 2647 }, { "epoch": 0.3793152843432173, "grad_norm": 1.1174323558807373, "learning_rate": 0.00014255945621015863, "loss": 1.5359, "step": 2648 }, { "epoch": 0.3794585302965191, "grad_norm": 1.0801656246185303, "learning_rate": 0.00014251746564576082, "loss": 1.4907, "step": 2649 }, { "epoch": 0.37960177624982094, "grad_norm": 0.8966488242149353, "learning_rate": 0.0001424754659284048, "loss": 1.3018, "step": 2650 }, { "epoch": 0.3797450222031228, "grad_norm": 1.1092829704284668, "learning_rate": 0.00014243345706713205, "loss": 1.3202, "step": 2651 }, { "epoch": 0.37988826815642457, "grad_norm": 1.1917495727539062, "learning_rate": 0.0001423914390709861, "loss": 1.4054, "step": 2652 }, { "epoch": 0.3800315141097264, "grad_norm": 0.9716973304748535, "learning_rate": 0.0001423494119490123, "loss": 1.5545, "step": 2653 }, { "epoch": 0.3801747600630282, "grad_norm": 0.965319037437439, "learning_rate": 0.00014230737571025812, "loss": 1.4721, "step": 2654 }, { "epoch": 0.38031800601633003, "grad_norm": 1.1608227491378784, "learning_rate": 0.00014226533036377286, "loss": 1.3839, "step": 2655 }, { "epoch": 0.3804612519696319, "grad_norm": 1.4573674201965332, "learning_rate": 0.00014222327591860792, "loss": 1.4683, "step": 2656 }, { "epoch": 0.38060449792293366, "grad_norm": 1.3096299171447754, "learning_rate": 0.00014218121238381652, "loss": 1.3719, "step": 2657 }, { "epoch": 0.3807477438762355, "grad_norm": 1.1627764701843262, "learning_rate": 0.0001421391397684539, "loss": 1.4346, "step": 2658 }, { "epoch": 0.38089098982953734, "grad_norm": 1.2307974100112915, "learning_rate": 0.00014209705808157733, "loss": 1.5122, "step": 2659 }, { "epoch": 0.3810342357828391, "grad_norm": 1.1243153810501099, "learning_rate": 0.00014205496733224582, "loss": 1.5507, "step": 2660 }, { "epoch": 0.38117748173614097, "grad_norm": 1.220015048980713, "learning_rate": 0.00014201286752952056, "loss": 1.5266, "step": 2661 }, { "epoch": 0.38132072768944275, "grad_norm": 1.0911564826965332, "learning_rate": 0.00014197075868246461, "loss": 1.4817, "step": 2662 }, { "epoch": 0.3814639736427446, "grad_norm": 1.0004096031188965, "learning_rate": 0.0001419286408001429, "loss": 1.5482, "step": 2663 }, { "epoch": 0.38160721959604643, "grad_norm": 1.1626720428466797, "learning_rate": 0.00014188651389162243, "loss": 1.5116, "step": 2664 }, { "epoch": 0.3817504655493482, "grad_norm": 1.1454691886901855, "learning_rate": 0.00014184437796597202, "loss": 1.4704, "step": 2665 }, { "epoch": 0.38189371150265006, "grad_norm": 1.0907492637634277, "learning_rate": 0.00014180223303226255, "loss": 1.5029, "step": 2666 }, { "epoch": 0.38203695745595184, "grad_norm": 1.0236293077468872, "learning_rate": 0.0001417600790995667, "loss": 1.5527, "step": 2667 }, { "epoch": 0.3821802034092537, "grad_norm": 1.1283305883407593, "learning_rate": 0.00014171791617695927, "loss": 1.3616, "step": 2668 }, { "epoch": 0.3823234493625555, "grad_norm": 1.5132068395614624, "learning_rate": 0.00014167574427351683, "loss": 1.4162, "step": 2669 }, { "epoch": 0.3824666953158573, "grad_norm": 1.174524188041687, "learning_rate": 0.00014163356339831797, "loss": 1.4974, "step": 2670 }, { "epoch": 0.38260994126915915, "grad_norm": 1.099651575088501, "learning_rate": 0.00014159137356044318, "loss": 1.4267, "step": 2671 }, { "epoch": 0.382753187222461, "grad_norm": 0.9730185866355896, "learning_rate": 0.00014154917476897486, "loss": 1.4964, "step": 2672 }, { "epoch": 0.3828964331757628, "grad_norm": 1.252204418182373, "learning_rate": 0.00014150696703299744, "loss": 1.502, "step": 2673 }, { "epoch": 0.3830396791290646, "grad_norm": 1.2112748622894287, "learning_rate": 0.00014146475036159713, "loss": 1.5899, "step": 2674 }, { "epoch": 0.3831829250823664, "grad_norm": 0.9682261347770691, "learning_rate": 0.00014142252476386218, "loss": 1.5922, "step": 2675 }, { "epoch": 0.38332617103566824, "grad_norm": 1.1810462474822998, "learning_rate": 0.00014138029024888263, "loss": 1.5129, "step": 2676 }, { "epoch": 0.3834694169889701, "grad_norm": 1.1216154098510742, "learning_rate": 0.00014133804682575068, "loss": 1.3655, "step": 2677 }, { "epoch": 0.38361266294227186, "grad_norm": 1.2673734426498413, "learning_rate": 0.00014129579450356016, "loss": 1.3562, "step": 2678 }, { "epoch": 0.3837559088955737, "grad_norm": 1.0625911951065063, "learning_rate": 0.00014125353329140703, "loss": 1.5199, "step": 2679 }, { "epoch": 0.38389915484887555, "grad_norm": 1.0846940279006958, "learning_rate": 0.000141211263198389, "loss": 1.5895, "step": 2680 }, { "epoch": 0.38404240080217733, "grad_norm": 1.1811267137527466, "learning_rate": 0.00014116898423360586, "loss": 1.4873, "step": 2681 }, { "epoch": 0.38418564675547917, "grad_norm": 1.2285857200622559, "learning_rate": 0.00014112669640615918, "loss": 1.4566, "step": 2682 }, { "epoch": 0.38432889270878096, "grad_norm": 1.1813266277313232, "learning_rate": 0.00014108439972515248, "loss": 1.4262, "step": 2683 }, { "epoch": 0.3844721386620828, "grad_norm": 1.395022988319397, "learning_rate": 0.00014104209419969123, "loss": 1.4795, "step": 2684 }, { "epoch": 0.38461538461538464, "grad_norm": 1.0140002965927124, "learning_rate": 0.00014099977983888267, "loss": 1.4915, "step": 2685 }, { "epoch": 0.3847586305686864, "grad_norm": 1.1237748861312866, "learning_rate": 0.00014095745665183618, "loss": 1.4766, "step": 2686 }, { "epoch": 0.38490187652198826, "grad_norm": 0.9617596864700317, "learning_rate": 0.00014091512464766277, "loss": 1.3649, "step": 2687 }, { "epoch": 0.38504512247529005, "grad_norm": 0.9155746698379517, "learning_rate": 0.00014087278383547553, "loss": 1.4637, "step": 2688 }, { "epoch": 0.3851883684285919, "grad_norm": 1.0307685136795044, "learning_rate": 0.00014083043422438935, "loss": 1.4593, "step": 2689 }, { "epoch": 0.38533161438189373, "grad_norm": 1.1862280368804932, "learning_rate": 0.00014078807582352108, "loss": 1.4602, "step": 2690 }, { "epoch": 0.3854748603351955, "grad_norm": 1.167630910873413, "learning_rate": 0.00014074570864198947, "loss": 1.3992, "step": 2691 }, { "epoch": 0.38561810628849735, "grad_norm": 0.993311882019043, "learning_rate": 0.00014070333268891504, "loss": 1.5854, "step": 2692 }, { "epoch": 0.3857613522417992, "grad_norm": 1.0830954313278198, "learning_rate": 0.00014066094797342036, "loss": 1.5101, "step": 2693 }, { "epoch": 0.385904598195101, "grad_norm": 1.4246561527252197, "learning_rate": 0.00014061855450462978, "loss": 1.424, "step": 2694 }, { "epoch": 0.3860478441484028, "grad_norm": 0.9677397608757019, "learning_rate": 0.0001405761522916696, "loss": 1.5294, "step": 2695 }, { "epoch": 0.3861910901017046, "grad_norm": 1.0421785116195679, "learning_rate": 0.00014053374134366788, "loss": 1.4412, "step": 2696 }, { "epoch": 0.38633433605500644, "grad_norm": 1.1148985624313354, "learning_rate": 0.0001404913216697547, "loss": 1.539, "step": 2697 }, { "epoch": 0.3864775820083083, "grad_norm": 1.039480209350586, "learning_rate": 0.00014044889327906202, "loss": 1.5344, "step": 2698 }, { "epoch": 0.38662082796161007, "grad_norm": 1.0137301683425903, "learning_rate": 0.00014040645618072355, "loss": 1.5247, "step": 2699 }, { "epoch": 0.3867640739149119, "grad_norm": 1.2161946296691895, "learning_rate": 0.00014036401038387497, "loss": 1.3838, "step": 2700 }, { "epoch": 0.38690731986821375, "grad_norm": 0.9870474338531494, "learning_rate": 0.00014032155589765378, "loss": 1.5133, "step": 2701 }, { "epoch": 0.38705056582151554, "grad_norm": 0.9808529615402222, "learning_rate": 0.00014027909273119944, "loss": 1.3012, "step": 2702 }, { "epoch": 0.3871938117748174, "grad_norm": 0.986569881439209, "learning_rate": 0.00014023662089365319, "loss": 1.5178, "step": 2703 }, { "epoch": 0.38733705772811916, "grad_norm": 1.529737114906311, "learning_rate": 0.00014019414039415817, "loss": 1.4995, "step": 2704 }, { "epoch": 0.387480303681421, "grad_norm": 1.100267767906189, "learning_rate": 0.00014015165124185933, "loss": 1.3576, "step": 2705 }, { "epoch": 0.38762354963472284, "grad_norm": 1.2582354545593262, "learning_rate": 0.00014010915344590363, "loss": 1.4032, "step": 2706 }, { "epoch": 0.3877667955880246, "grad_norm": 1.3152642250061035, "learning_rate": 0.0001400666470154397, "loss": 1.5016, "step": 2707 }, { "epoch": 0.38791004154132647, "grad_norm": 1.2205884456634521, "learning_rate": 0.00014002413195961819, "loss": 1.3578, "step": 2708 }, { "epoch": 0.38805328749462825, "grad_norm": 1.118962287902832, "learning_rate": 0.0001399816082875915, "loss": 1.4669, "step": 2709 }, { "epoch": 0.3881965334479301, "grad_norm": 1.0975672006607056, "learning_rate": 0.0001399390760085139, "loss": 1.5507, "step": 2710 }, { "epoch": 0.38833977940123193, "grad_norm": 0.8327113389968872, "learning_rate": 0.00013989653513154165, "loss": 1.5713, "step": 2711 }, { "epoch": 0.3884830253545337, "grad_norm": 1.1631824970245361, "learning_rate": 0.00013985398566583262, "loss": 1.4363, "step": 2712 }, { "epoch": 0.38862627130783556, "grad_norm": 1.2302311658859253, "learning_rate": 0.00013981142762054674, "loss": 1.3535, "step": 2713 }, { "epoch": 0.3887695172611374, "grad_norm": 1.2719974517822266, "learning_rate": 0.00013976886100484562, "loss": 1.4649, "step": 2714 }, { "epoch": 0.3889127632144392, "grad_norm": 1.0551207065582275, "learning_rate": 0.00013972628582789294, "loss": 1.4807, "step": 2715 }, { "epoch": 0.389056009167741, "grad_norm": 1.2198597192764282, "learning_rate": 0.00013968370209885392, "loss": 1.4442, "step": 2716 }, { "epoch": 0.3891992551210428, "grad_norm": 1.2216465473175049, "learning_rate": 0.0001396411098268959, "loss": 1.4289, "step": 2717 }, { "epoch": 0.38934250107434465, "grad_norm": 1.29093599319458, "learning_rate": 0.00013959850902118786, "loss": 1.2032, "step": 2718 }, { "epoch": 0.3894857470276465, "grad_norm": 1.2781002521514893, "learning_rate": 0.00013955589969090075, "loss": 1.4226, "step": 2719 }, { "epoch": 0.3896289929809483, "grad_norm": 1.0744190216064453, "learning_rate": 0.00013951328184520732, "loss": 1.544, "step": 2720 }, { "epoch": 0.3897722389342501, "grad_norm": 1.1971217393875122, "learning_rate": 0.0001394706554932821, "loss": 1.4484, "step": 2721 }, { "epoch": 0.3899154848875519, "grad_norm": 1.0613747835159302, "learning_rate": 0.00013942802064430146, "loss": 1.5457, "step": 2722 }, { "epoch": 0.39005873084085374, "grad_norm": 1.0913233757019043, "learning_rate": 0.0001393853773074437, "loss": 1.3368, "step": 2723 }, { "epoch": 0.3902019767941556, "grad_norm": 1.1087960004806519, "learning_rate": 0.00013934272549188888, "loss": 1.3487, "step": 2724 }, { "epoch": 0.39034522274745737, "grad_norm": 1.001875638961792, "learning_rate": 0.0001393000652068188, "loss": 1.5003, "step": 2725 }, { "epoch": 0.3904884687007592, "grad_norm": 1.1012027263641357, "learning_rate": 0.0001392573964614172, "loss": 1.459, "step": 2726 }, { "epoch": 0.39063171465406105, "grad_norm": 1.2598809003829956, "learning_rate": 0.00013921471926486961, "loss": 1.4978, "step": 2727 }, { "epoch": 0.39077496060736283, "grad_norm": 1.0599154233932495, "learning_rate": 0.0001391720336263634, "loss": 1.5258, "step": 2728 }, { "epoch": 0.39091820656066467, "grad_norm": 1.2188338041305542, "learning_rate": 0.0001391293395550877, "loss": 1.3219, "step": 2729 }, { "epoch": 0.39106145251396646, "grad_norm": 1.2200278043746948, "learning_rate": 0.00013908663706023347, "loss": 1.4668, "step": 2730 }, { "epoch": 0.3912046984672683, "grad_norm": 0.9860569834709167, "learning_rate": 0.00013904392615099356, "loss": 1.4106, "step": 2731 }, { "epoch": 0.39134794442057014, "grad_norm": 1.3161883354187012, "learning_rate": 0.00013900120683656253, "loss": 1.3509, "step": 2732 }, { "epoch": 0.3914911903738719, "grad_norm": 1.2047829627990723, "learning_rate": 0.00013895847912613678, "loss": 1.3227, "step": 2733 }, { "epoch": 0.39163443632717376, "grad_norm": 1.234537959098816, "learning_rate": 0.00013891574302891458, "loss": 1.497, "step": 2734 }, { "epoch": 0.3917776822804756, "grad_norm": 1.2812901735305786, "learning_rate": 0.00013887299855409586, "loss": 1.4334, "step": 2735 }, { "epoch": 0.3919209282337774, "grad_norm": 1.0933613777160645, "learning_rate": 0.00013883024571088257, "loss": 1.3088, "step": 2736 }, { "epoch": 0.39206417418707923, "grad_norm": 1.1715500354766846, "learning_rate": 0.00013878748450847826, "loss": 1.4636, "step": 2737 }, { "epoch": 0.392207420140381, "grad_norm": 1.0626330375671387, "learning_rate": 0.00013874471495608836, "loss": 1.458, "step": 2738 }, { "epoch": 0.39235066609368285, "grad_norm": 1.1760331392288208, "learning_rate": 0.00013870193706292012, "loss": 1.6347, "step": 2739 }, { "epoch": 0.3924939120469847, "grad_norm": 1.1869091987609863, "learning_rate": 0.00013865915083818256, "loss": 1.5342, "step": 2740 }, { "epoch": 0.3926371580002865, "grad_norm": 1.1295634508132935, "learning_rate": 0.0001386163562910865, "loss": 1.563, "step": 2741 }, { "epoch": 0.3927804039535883, "grad_norm": 1.0127500295639038, "learning_rate": 0.00013857355343084452, "loss": 1.55, "step": 2742 }, { "epoch": 0.3929236499068901, "grad_norm": 1.2653014659881592, "learning_rate": 0.00013853074226667102, "loss": 1.3498, "step": 2743 }, { "epoch": 0.39306689586019194, "grad_norm": 1.2269583940505981, "learning_rate": 0.00013848792280778222, "loss": 1.5557, "step": 2744 }, { "epoch": 0.3932101418134938, "grad_norm": 1.3298572301864624, "learning_rate": 0.0001384450950633961, "loss": 1.527, "step": 2745 }, { "epoch": 0.39335338776679557, "grad_norm": 1.0951743125915527, "learning_rate": 0.00013840225904273234, "loss": 1.5201, "step": 2746 }, { "epoch": 0.3934966337200974, "grad_norm": 1.1128827333450317, "learning_rate": 0.00013835941475501251, "loss": 1.381, "step": 2747 }, { "epoch": 0.39363987967339925, "grad_norm": 1.1959412097930908, "learning_rate": 0.00013831656220945993, "loss": 1.3947, "step": 2748 }, { "epoch": 0.39378312562670104, "grad_norm": 1.0881004333496094, "learning_rate": 0.0001382737014152997, "loss": 1.5294, "step": 2749 }, { "epoch": 0.3939263715800029, "grad_norm": 1.0680370330810547, "learning_rate": 0.00013823083238175872, "loss": 1.3959, "step": 2750 }, { "epoch": 0.39406961753330466, "grad_norm": 1.1935373544692993, "learning_rate": 0.00013818795511806554, "loss": 1.5015, "step": 2751 }, { "epoch": 0.3942128634866065, "grad_norm": 1.1426711082458496, "learning_rate": 0.00013814506963345067, "loss": 1.3803, "step": 2752 }, { "epoch": 0.39435610943990834, "grad_norm": 1.140364646911621, "learning_rate": 0.00013810217593714623, "loss": 1.6693, "step": 2753 }, { "epoch": 0.3944993553932101, "grad_norm": 0.9717327952384949, "learning_rate": 0.00013805927403838622, "loss": 1.4261, "step": 2754 }, { "epoch": 0.39464260134651197, "grad_norm": 1.185996174812317, "learning_rate": 0.00013801636394640627, "loss": 1.3365, "step": 2755 }, { "epoch": 0.3947858472998138, "grad_norm": 1.1653282642364502, "learning_rate": 0.00013797344567044396, "loss": 1.4204, "step": 2756 }, { "epoch": 0.3949290932531156, "grad_norm": 1.1007578372955322, "learning_rate": 0.00013793051921973852, "loss": 1.5159, "step": 2757 }, { "epoch": 0.39507233920641743, "grad_norm": 1.0245144367218018, "learning_rate": 0.00013788758460353087, "loss": 1.5361, "step": 2758 }, { "epoch": 0.3952155851597192, "grad_norm": 1.1577740907669067, "learning_rate": 0.00013784464183106389, "loss": 1.4572, "step": 2759 }, { "epoch": 0.39535883111302106, "grad_norm": 0.9496899247169495, "learning_rate": 0.00013780169091158197, "loss": 1.3499, "step": 2760 }, { "epoch": 0.3955020770663229, "grad_norm": 1.0334111452102661, "learning_rate": 0.0001377587318543315, "loss": 1.3178, "step": 2761 }, { "epoch": 0.3956453230196247, "grad_norm": 1.073441982269287, "learning_rate": 0.0001377157646685604, "loss": 1.5489, "step": 2762 }, { "epoch": 0.3957885689729265, "grad_norm": 1.1558798551559448, "learning_rate": 0.00013767278936351854, "loss": 1.5191, "step": 2763 }, { "epoch": 0.3959318149262283, "grad_norm": 1.1839429140090942, "learning_rate": 0.0001376298059484573, "loss": 1.4471, "step": 2764 }, { "epoch": 0.39607506087953015, "grad_norm": 0.9303610324859619, "learning_rate": 0.00013758681443263012, "loss": 1.52, "step": 2765 }, { "epoch": 0.396218306832832, "grad_norm": 1.0678328275680542, "learning_rate": 0.00013754381482529188, "loss": 1.5048, "step": 2766 }, { "epoch": 0.3963615527861338, "grad_norm": 1.188411831855774, "learning_rate": 0.0001375008071356994, "loss": 1.6107, "step": 2767 }, { "epoch": 0.3965047987394356, "grad_norm": 0.9718554019927979, "learning_rate": 0.0001374577913731111, "loss": 1.4725, "step": 2768 }, { "epoch": 0.39664804469273746, "grad_norm": 0.9430140852928162, "learning_rate": 0.0001374147675467873, "loss": 1.347, "step": 2769 }, { "epoch": 0.39679129064603924, "grad_norm": 1.3940454721450806, "learning_rate": 0.00013737173566598991, "loss": 1.4218, "step": 2770 }, { "epoch": 0.3969345365993411, "grad_norm": 1.2685039043426514, "learning_rate": 0.00013732869573998262, "loss": 1.4834, "step": 2771 }, { "epoch": 0.39707778255264287, "grad_norm": 0.9639720320701599, "learning_rate": 0.00013728564777803088, "loss": 1.4831, "step": 2772 }, { "epoch": 0.3972210285059447, "grad_norm": 1.4262007474899292, "learning_rate": 0.00013724259178940184, "loss": 1.6025, "step": 2773 }, { "epoch": 0.39736427445924655, "grad_norm": 1.1385854482650757, "learning_rate": 0.00013719952778336442, "loss": 1.4198, "step": 2774 }, { "epoch": 0.39750752041254833, "grad_norm": 1.6640490293502808, "learning_rate": 0.0001371564557691892, "loss": 1.4862, "step": 2775 }, { "epoch": 0.3976507663658502, "grad_norm": 1.1516785621643066, "learning_rate": 0.00013711337575614857, "loss": 1.4584, "step": 2776 }, { "epoch": 0.39779401231915196, "grad_norm": 1.0535268783569336, "learning_rate": 0.0001370702877535165, "loss": 1.3723, "step": 2777 }, { "epoch": 0.3979372582724538, "grad_norm": 1.0283282995224, "learning_rate": 0.00013702719177056884, "loss": 1.7369, "step": 2778 }, { "epoch": 0.39808050422575564, "grad_norm": 1.1601848602294922, "learning_rate": 0.0001369840878165831, "loss": 1.4379, "step": 2779 }, { "epoch": 0.3982237501790574, "grad_norm": 1.1825915575027466, "learning_rate": 0.00013694097590083844, "loss": 1.4749, "step": 2780 }, { "epoch": 0.39836699613235926, "grad_norm": 1.1129486560821533, "learning_rate": 0.00013689785603261583, "loss": 1.5742, "step": 2781 }, { "epoch": 0.3985102420856611, "grad_norm": 1.1634058952331543, "learning_rate": 0.00013685472822119786, "loss": 1.4949, "step": 2782 }, { "epoch": 0.3986534880389629, "grad_norm": 1.1009522676467896, "learning_rate": 0.00013681159247586896, "loss": 1.5692, "step": 2783 }, { "epoch": 0.39879673399226473, "grad_norm": 1.083465337753296, "learning_rate": 0.00013676844880591512, "loss": 1.6274, "step": 2784 }, { "epoch": 0.3989399799455665, "grad_norm": 0.9692406058311462, "learning_rate": 0.00013672529722062415, "loss": 1.568, "step": 2785 }, { "epoch": 0.39908322589886835, "grad_norm": 1.1225932836532593, "learning_rate": 0.0001366821377292855, "loss": 1.5064, "step": 2786 }, { "epoch": 0.3992264718521702, "grad_norm": 1.116595983505249, "learning_rate": 0.0001366389703411903, "loss": 1.472, "step": 2787 }, { "epoch": 0.399369717805472, "grad_norm": 1.1441022157669067, "learning_rate": 0.0001365957950656315, "loss": 1.6116, "step": 2788 }, { "epoch": 0.3995129637587738, "grad_norm": 1.054233193397522, "learning_rate": 0.0001365526119119036, "loss": 1.5448, "step": 2789 }, { "epoch": 0.39965620971207566, "grad_norm": 0.8752365708351135, "learning_rate": 0.00013650942088930295, "loss": 1.4324, "step": 2790 }, { "epoch": 0.39979945566537745, "grad_norm": 1.4697915315628052, "learning_rate": 0.00013646622200712738, "loss": 1.3729, "step": 2791 }, { "epoch": 0.3999427016186793, "grad_norm": 1.1016391515731812, "learning_rate": 0.00013642301527467664, "loss": 1.2313, "step": 2792 }, { "epoch": 0.40008594757198107, "grad_norm": 1.0731985569000244, "learning_rate": 0.00013637980070125205, "loss": 1.5159, "step": 2793 }, { "epoch": 0.4002291935252829, "grad_norm": 1.120406150817871, "learning_rate": 0.0001363365782961566, "loss": 1.3374, "step": 2794 }, { "epoch": 0.40037243947858475, "grad_norm": 1.0571527481079102, "learning_rate": 0.00013629334806869507, "loss": 1.5812, "step": 2795 }, { "epoch": 0.40051568543188654, "grad_norm": 1.041649580001831, "learning_rate": 0.0001362501100281738, "loss": 1.3901, "step": 2796 }, { "epoch": 0.4006589313851884, "grad_norm": 1.134366750717163, "learning_rate": 0.0001362068641839009, "loss": 1.6005, "step": 2797 }, { "epoch": 0.40080217733849016, "grad_norm": 1.573654055595398, "learning_rate": 0.0001361636105451861, "loss": 1.3569, "step": 2798 }, { "epoch": 0.400945423291792, "grad_norm": 1.1163673400878906, "learning_rate": 0.0001361203491213409, "loss": 1.5145, "step": 2799 }, { "epoch": 0.40108866924509384, "grad_norm": 1.1613695621490479, "learning_rate": 0.00013607707992167834, "loss": 1.3559, "step": 2800 }, { "epoch": 0.40123191519839563, "grad_norm": 1.2973524332046509, "learning_rate": 0.00013603380295551328, "loss": 1.4852, "step": 2801 }, { "epoch": 0.40137516115169747, "grad_norm": 1.0951015949249268, "learning_rate": 0.0001359905182321621, "loss": 1.2779, "step": 2802 }, { "epoch": 0.4015184071049993, "grad_norm": 1.144309163093567, "learning_rate": 0.00013594722576094296, "loss": 1.4245, "step": 2803 }, { "epoch": 0.4016616530583011, "grad_norm": 1.2852388620376587, "learning_rate": 0.00013590392555117573, "loss": 1.5809, "step": 2804 }, { "epoch": 0.40180489901160293, "grad_norm": 1.2533011436462402, "learning_rate": 0.00013586061761218176, "loss": 1.4776, "step": 2805 }, { "epoch": 0.4019481449649047, "grad_norm": 1.0780951976776123, "learning_rate": 0.00013581730195328425, "loss": 1.5449, "step": 2806 }, { "epoch": 0.40209139091820656, "grad_norm": 1.12636137008667, "learning_rate": 0.00013577397858380798, "loss": 1.6181, "step": 2807 }, { "epoch": 0.4022346368715084, "grad_norm": 1.159757375717163, "learning_rate": 0.0001357306475130794, "loss": 1.4516, "step": 2808 }, { "epoch": 0.4023778828248102, "grad_norm": 1.234001636505127, "learning_rate": 0.00013568730875042654, "loss": 1.4359, "step": 2809 }, { "epoch": 0.402521128778112, "grad_norm": 1.2021113634109497, "learning_rate": 0.0001356439623051793, "loss": 1.5114, "step": 2810 }, { "epoch": 0.4026643747314138, "grad_norm": 1.043145775794983, "learning_rate": 0.000135600608186669, "loss": 1.5188, "step": 2811 }, { "epoch": 0.40280762068471565, "grad_norm": 1.0737450122833252, "learning_rate": 0.00013555724640422874, "loss": 1.5434, "step": 2812 }, { "epoch": 0.4029508666380175, "grad_norm": 1.1912723779678345, "learning_rate": 0.00013551387696719326, "loss": 1.5644, "step": 2813 }, { "epoch": 0.4030941125913193, "grad_norm": 0.937789261341095, "learning_rate": 0.00013547049988489883, "loss": 1.7395, "step": 2814 }, { "epoch": 0.4032373585446211, "grad_norm": 1.2506660223007202, "learning_rate": 0.0001354271151666836, "loss": 1.5189, "step": 2815 }, { "epoch": 0.40338060449792296, "grad_norm": 1.3174413442611694, "learning_rate": 0.00013538372282188713, "loss": 1.5846, "step": 2816 }, { "epoch": 0.40352385045122474, "grad_norm": 1.0043715238571167, "learning_rate": 0.0001353403228598508, "loss": 1.3538, "step": 2817 }, { "epoch": 0.4036670964045266, "grad_norm": 1.2512023448944092, "learning_rate": 0.00013529691528991743, "loss": 1.5064, "step": 2818 }, { "epoch": 0.40381034235782837, "grad_norm": 1.122894048690796, "learning_rate": 0.00013525350012143168, "loss": 1.4153, "step": 2819 }, { "epoch": 0.4039535883111302, "grad_norm": 1.2257254123687744, "learning_rate": 0.00013521007736373974, "loss": 1.479, "step": 2820 }, { "epoch": 0.40409683426443205, "grad_norm": 1.211525559425354, "learning_rate": 0.00013516664702618948, "loss": 1.4493, "step": 2821 }, { "epoch": 0.40424008021773383, "grad_norm": 0.9623228907585144, "learning_rate": 0.0001351232091181303, "loss": 1.4962, "step": 2822 }, { "epoch": 0.4043833261710357, "grad_norm": 1.1287436485290527, "learning_rate": 0.0001350797636489134, "loss": 1.4036, "step": 2823 }, { "epoch": 0.4045265721243375, "grad_norm": 1.3107287883758545, "learning_rate": 0.0001350363106278915, "loss": 1.5864, "step": 2824 }, { "epoch": 0.4046698180776393, "grad_norm": 0.955753743648529, "learning_rate": 0.00013499285006441888, "loss": 1.3797, "step": 2825 }, { "epoch": 0.40481306403094114, "grad_norm": 0.9739630818367004, "learning_rate": 0.00013494938196785162, "loss": 1.4027, "step": 2826 }, { "epoch": 0.4049563099842429, "grad_norm": 1.1018555164337158, "learning_rate": 0.00013490590634754728, "loss": 1.4327, "step": 2827 }, { "epoch": 0.40509955593754476, "grad_norm": 1.1858118772506714, "learning_rate": 0.00013486242321286506, "loss": 1.473, "step": 2828 }, { "epoch": 0.4052428018908466, "grad_norm": 1.1405820846557617, "learning_rate": 0.0001348189325731659, "loss": 1.3526, "step": 2829 }, { "epoch": 0.4053860478441484, "grad_norm": 0.9196799397468567, "learning_rate": 0.0001347754344378121, "loss": 1.5678, "step": 2830 }, { "epoch": 0.40552929379745023, "grad_norm": 1.2381303310394287, "learning_rate": 0.0001347319288161679, "loss": 1.5034, "step": 2831 }, { "epoch": 0.405672539750752, "grad_norm": 1.251454472541809, "learning_rate": 0.00013468841571759888, "loss": 1.2357, "step": 2832 }, { "epoch": 0.40581578570405386, "grad_norm": 1.2669380903244019, "learning_rate": 0.00013464489515147238, "loss": 1.3635, "step": 2833 }, { "epoch": 0.4059590316573557, "grad_norm": 1.0958219766616821, "learning_rate": 0.00013460136712715724, "loss": 1.6209, "step": 2834 }, { "epoch": 0.4061022776106575, "grad_norm": 1.2327121496200562, "learning_rate": 0.00013455783165402404, "loss": 1.4954, "step": 2835 }, { "epoch": 0.4062455235639593, "grad_norm": 1.2271267175674438, "learning_rate": 0.00013451428874144484, "loss": 1.37, "step": 2836 }, { "epoch": 0.40638876951726116, "grad_norm": 1.0246933698654175, "learning_rate": 0.0001344707383987934, "loss": 1.4175, "step": 2837 }, { "epoch": 0.40653201547056295, "grad_norm": 1.249424934387207, "learning_rate": 0.000134427180635445, "loss": 1.338, "step": 2838 }, { "epoch": 0.4066752614238648, "grad_norm": 1.0454884767532349, "learning_rate": 0.0001343836154607765, "loss": 1.4687, "step": 2839 }, { "epoch": 0.40681850737716657, "grad_norm": 1.1392124891281128, "learning_rate": 0.0001343400428841665, "loss": 1.6099, "step": 2840 }, { "epoch": 0.4069617533304684, "grad_norm": 1.1528297662734985, "learning_rate": 0.00013429646291499502, "loss": 1.4681, "step": 2841 }, { "epoch": 0.40710499928377025, "grad_norm": 1.300648808479309, "learning_rate": 0.00013425287556264383, "loss": 1.41, "step": 2842 }, { "epoch": 0.40724824523707204, "grad_norm": 1.127364993095398, "learning_rate": 0.00013420928083649608, "loss": 1.5362, "step": 2843 }, { "epoch": 0.4073914911903739, "grad_norm": 1.1325756311416626, "learning_rate": 0.0001341656787459368, "loss": 1.5431, "step": 2844 }, { "epoch": 0.4075347371436757, "grad_norm": 0.8674351572990417, "learning_rate": 0.0001341220693003523, "loss": 1.424, "step": 2845 }, { "epoch": 0.4076779830969775, "grad_norm": 1.3473536968231201, "learning_rate": 0.00013407845250913066, "loss": 1.3906, "step": 2846 }, { "epoch": 0.40782122905027934, "grad_norm": 1.1625980138778687, "learning_rate": 0.00013403482838166155, "loss": 1.5181, "step": 2847 }, { "epoch": 0.40796447500358113, "grad_norm": 1.3195536136627197, "learning_rate": 0.00013399119692733612, "loss": 1.5212, "step": 2848 }, { "epoch": 0.40810772095688297, "grad_norm": 0.977924644947052, "learning_rate": 0.00013394755815554713, "loss": 1.5501, "step": 2849 }, { "epoch": 0.4082509669101848, "grad_norm": 1.2794781923294067, "learning_rate": 0.0001339039120756889, "loss": 1.4029, "step": 2850 }, { "epoch": 0.4083942128634866, "grad_norm": 1.2609822750091553, "learning_rate": 0.00013386025869715744, "loss": 1.6376, "step": 2851 }, { "epoch": 0.40853745881678843, "grad_norm": 1.1048805713653564, "learning_rate": 0.00013381659802935017, "loss": 1.3858, "step": 2852 }, { "epoch": 0.4086807047700902, "grad_norm": 1.3309123516082764, "learning_rate": 0.0001337729300816662, "loss": 1.4825, "step": 2853 }, { "epoch": 0.40882395072339206, "grad_norm": 1.2099355459213257, "learning_rate": 0.0001337292548635061, "loss": 1.2859, "step": 2854 }, { "epoch": 0.4089671966766939, "grad_norm": 1.0338795185089111, "learning_rate": 0.00013368557238427212, "loss": 1.4519, "step": 2855 }, { "epoch": 0.4091104426299957, "grad_norm": 1.0662775039672852, "learning_rate": 0.000133641882653368, "loss": 1.5718, "step": 2856 }, { "epoch": 0.4092536885832975, "grad_norm": 1.3427984714508057, "learning_rate": 0.00013359818568019904, "loss": 1.4128, "step": 2857 }, { "epoch": 0.40939693453659937, "grad_norm": 1.0407174825668335, "learning_rate": 0.00013355448147417214, "loss": 1.4963, "step": 2858 }, { "epoch": 0.40954018048990115, "grad_norm": 1.2363427877426147, "learning_rate": 0.0001335107700446957, "loss": 1.5084, "step": 2859 }, { "epoch": 0.409683426443203, "grad_norm": 0.9856411218643188, "learning_rate": 0.00013346705140117977, "loss": 1.5685, "step": 2860 }, { "epoch": 0.4098266723965048, "grad_norm": 1.1373803615570068, "learning_rate": 0.0001334233255530358, "loss": 1.5162, "step": 2861 }, { "epoch": 0.4099699183498066, "grad_norm": 1.0047681331634521, "learning_rate": 0.00013337959250967697, "loss": 1.3596, "step": 2862 }, { "epoch": 0.41011316430310846, "grad_norm": 1.3044847249984741, "learning_rate": 0.0001333358522805179, "loss": 1.4707, "step": 2863 }, { "epoch": 0.41025641025641024, "grad_norm": 1.3690197467803955, "learning_rate": 0.00013329210487497475, "loss": 1.4703, "step": 2864 }, { "epoch": 0.4103996562097121, "grad_norm": 0.9814923405647278, "learning_rate": 0.0001332483503024653, "loss": 1.5002, "step": 2865 }, { "epoch": 0.41054290216301387, "grad_norm": 1.3000876903533936, "learning_rate": 0.00013320458857240877, "loss": 1.542, "step": 2866 }, { "epoch": 0.4106861481163157, "grad_norm": 1.0864509344100952, "learning_rate": 0.00013316081969422602, "loss": 1.383, "step": 2867 }, { "epoch": 0.41082939406961755, "grad_norm": 0.9772033095359802, "learning_rate": 0.00013311704367733937, "loss": 1.51, "step": 2868 }, { "epoch": 0.41097264002291933, "grad_norm": 1.0879658460617065, "learning_rate": 0.00013307326053117277, "loss": 1.3992, "step": 2869 }, { "epoch": 0.4111158859762212, "grad_norm": 1.232767105102539, "learning_rate": 0.00013302947026515158, "loss": 1.6992, "step": 2870 }, { "epoch": 0.411259131929523, "grad_norm": 0.9638648629188538, "learning_rate": 0.0001329856728887028, "loss": 1.5831, "step": 2871 }, { "epoch": 0.4114023778828248, "grad_norm": 1.0368996858596802, "learning_rate": 0.00013294186841125493, "loss": 1.559, "step": 2872 }, { "epoch": 0.41154562383612664, "grad_norm": 1.0689702033996582, "learning_rate": 0.00013289805684223798, "loss": 1.494, "step": 2873 }, { "epoch": 0.4116888697894284, "grad_norm": 0.9811191558837891, "learning_rate": 0.0001328542381910835, "loss": 1.4314, "step": 2874 }, { "epoch": 0.41183211574273026, "grad_norm": 1.2971858978271484, "learning_rate": 0.00013281041246722452, "loss": 1.3218, "step": 2875 }, { "epoch": 0.4119753616960321, "grad_norm": 1.0183264017105103, "learning_rate": 0.00013276657968009568, "loss": 1.4108, "step": 2876 }, { "epoch": 0.4121186076493339, "grad_norm": 1.0744152069091797, "learning_rate": 0.0001327227398391331, "loss": 1.5798, "step": 2877 }, { "epoch": 0.41226185360263573, "grad_norm": 1.0109795331954956, "learning_rate": 0.00013267889295377438, "loss": 1.3757, "step": 2878 }, { "epoch": 0.41240509955593757, "grad_norm": 1.0059654712677002, "learning_rate": 0.00013263503903345868, "loss": 1.4304, "step": 2879 }, { "epoch": 0.41254834550923936, "grad_norm": 0.9491704106330872, "learning_rate": 0.0001325911780876267, "loss": 1.3791, "step": 2880 }, { "epoch": 0.4126915914625412, "grad_norm": 1.0815435647964478, "learning_rate": 0.00013254731012572052, "loss": 1.4149, "step": 2881 }, { "epoch": 0.412834837415843, "grad_norm": 0.9652405381202698, "learning_rate": 0.00013250343515718393, "loss": 1.3955, "step": 2882 }, { "epoch": 0.4129780833691448, "grad_norm": 1.0151206254959106, "learning_rate": 0.0001324595531914621, "loss": 1.3202, "step": 2883 }, { "epoch": 0.41312132932244666, "grad_norm": 1.0841914415359497, "learning_rate": 0.0001324156642380017, "loss": 1.3594, "step": 2884 }, { "epoch": 0.41326457527574845, "grad_norm": 1.0119885206222534, "learning_rate": 0.00013237176830625095, "loss": 1.4671, "step": 2885 }, { "epoch": 0.4134078212290503, "grad_norm": 1.1591053009033203, "learning_rate": 0.00013232786540565955, "loss": 1.4805, "step": 2886 }, { "epoch": 0.4135510671823521, "grad_norm": 1.3846744298934937, "learning_rate": 0.00013228395554567874, "loss": 1.3099, "step": 2887 }, { "epoch": 0.4136943131356539, "grad_norm": 1.0866940021514893, "learning_rate": 0.00013224003873576123, "loss": 1.2355, "step": 2888 }, { "epoch": 0.41383755908895575, "grad_norm": 1.1971701383590698, "learning_rate": 0.00013219611498536114, "loss": 1.4966, "step": 2889 }, { "epoch": 0.41398080504225754, "grad_norm": 0.9875962734222412, "learning_rate": 0.00013215218430393425, "loss": 1.4, "step": 2890 }, { "epoch": 0.4141240509955594, "grad_norm": 1.0728237628936768, "learning_rate": 0.0001321082467009377, "loss": 1.4172, "step": 2891 }, { "epoch": 0.4142672969488612, "grad_norm": 1.2615853548049927, "learning_rate": 0.00013206430218583025, "loss": 1.3558, "step": 2892 }, { "epoch": 0.414410542902163, "grad_norm": 1.1126292943954468, "learning_rate": 0.00013202035076807195, "loss": 1.5324, "step": 2893 }, { "epoch": 0.41455378885546484, "grad_norm": 1.0649443864822388, "learning_rate": 0.00013197639245712454, "loss": 1.4024, "step": 2894 }, { "epoch": 0.41469703480876663, "grad_norm": 1.183631420135498, "learning_rate": 0.0001319324272624511, "loss": 1.4497, "step": 2895 }, { "epoch": 0.41484028076206847, "grad_norm": 1.069616436958313, "learning_rate": 0.00013188845519351632, "loss": 1.3838, "step": 2896 }, { "epoch": 0.4149835267153703, "grad_norm": 1.0213828086853027, "learning_rate": 0.0001318444762597862, "loss": 1.2517, "step": 2897 }, { "epoch": 0.4151267726686721, "grad_norm": 1.1460169553756714, "learning_rate": 0.0001318004904707284, "loss": 1.3841, "step": 2898 }, { "epoch": 0.41527001862197394, "grad_norm": 1.0720977783203125, "learning_rate": 0.00013175649783581195, "loss": 1.7008, "step": 2899 }, { "epoch": 0.4154132645752758, "grad_norm": 1.2382010221481323, "learning_rate": 0.00013171249836450735, "loss": 1.6062, "step": 2900 }, { "epoch": 0.41555651052857756, "grad_norm": 1.060104489326477, "learning_rate": 0.00013166849206628663, "loss": 1.586, "step": 2901 }, { "epoch": 0.4156997564818794, "grad_norm": 1.1118627786636353, "learning_rate": 0.00013162447895062322, "loss": 1.5309, "step": 2902 }, { "epoch": 0.4158430024351812, "grad_norm": 0.8764147758483887, "learning_rate": 0.0001315804590269921, "loss": 1.5577, "step": 2903 }, { "epoch": 0.415986248388483, "grad_norm": 1.3989410400390625, "learning_rate": 0.00013153643230486963, "loss": 1.6303, "step": 2904 }, { "epoch": 0.41612949434178487, "grad_norm": 1.2069836854934692, "learning_rate": 0.0001314923987937337, "loss": 1.7139, "step": 2905 }, { "epoch": 0.41627274029508665, "grad_norm": 1.0896302461624146, "learning_rate": 0.0001314483585030636, "loss": 1.6546, "step": 2906 }, { "epoch": 0.4164159862483885, "grad_norm": 1.156543254852295, "learning_rate": 0.00013140431144234018, "loss": 1.4581, "step": 2907 }, { "epoch": 0.4165592322016903, "grad_norm": 1.2816578149795532, "learning_rate": 0.00013136025762104563, "loss": 1.5644, "step": 2908 }, { "epoch": 0.4167024781549921, "grad_norm": 0.8713088035583496, "learning_rate": 0.00013131619704866363, "loss": 1.5475, "step": 2909 }, { "epoch": 0.41684572410829396, "grad_norm": 0.9993542432785034, "learning_rate": 0.0001312721297346794, "loss": 1.4377, "step": 2910 }, { "epoch": 0.41698897006159574, "grad_norm": 1.2636206150054932, "learning_rate": 0.00013122805568857948, "loss": 1.4936, "step": 2911 }, { "epoch": 0.4171322160148976, "grad_norm": 1.3151670694351196, "learning_rate": 0.00013118397491985198, "loss": 1.4931, "step": 2912 }, { "epoch": 0.4172754619681994, "grad_norm": 1.1698790788650513, "learning_rate": 0.00013113988743798628, "loss": 1.5435, "step": 2913 }, { "epoch": 0.4174187079215012, "grad_norm": 0.9804201722145081, "learning_rate": 0.0001310957932524735, "loss": 1.4183, "step": 2914 }, { "epoch": 0.41756195387480305, "grad_norm": 1.2931900024414062, "learning_rate": 0.00013105169237280587, "loss": 1.5388, "step": 2915 }, { "epoch": 0.41770519982810483, "grad_norm": 1.1844921112060547, "learning_rate": 0.00013100758480847732, "loss": 1.4067, "step": 2916 }, { "epoch": 0.4178484457814067, "grad_norm": 1.134605884552002, "learning_rate": 0.00013096347056898308, "loss": 1.5314, "step": 2917 }, { "epoch": 0.4179916917347085, "grad_norm": 0.9807714223861694, "learning_rate": 0.00013091934966381983, "loss": 1.3936, "step": 2918 }, { "epoch": 0.4181349376880103, "grad_norm": 1.1086297035217285, "learning_rate": 0.00013087522210248576, "loss": 1.4978, "step": 2919 }, { "epoch": 0.41827818364131214, "grad_norm": 1.0083820819854736, "learning_rate": 0.00013083108789448038, "loss": 1.4995, "step": 2920 }, { "epoch": 0.4184214295946139, "grad_norm": 1.4726800918579102, "learning_rate": 0.00013078694704930473, "loss": 1.6217, "step": 2921 }, { "epoch": 0.41856467554791577, "grad_norm": 1.0786010026931763, "learning_rate": 0.00013074279957646126, "loss": 1.4239, "step": 2922 }, { "epoch": 0.4187079215012176, "grad_norm": 1.0577421188354492, "learning_rate": 0.0001306986454854538, "loss": 1.3987, "step": 2923 }, { "epoch": 0.4188511674545194, "grad_norm": 1.080257534980774, "learning_rate": 0.00013065448478578764, "loss": 1.5135, "step": 2924 }, { "epoch": 0.41899441340782123, "grad_norm": 1.2067092657089233, "learning_rate": 0.0001306103174869695, "loss": 1.5282, "step": 2925 }, { "epoch": 0.41913765936112307, "grad_norm": 1.0792328119277954, "learning_rate": 0.0001305661435985075, "loss": 1.3832, "step": 2926 }, { "epoch": 0.41928090531442486, "grad_norm": 0.9527145624160767, "learning_rate": 0.00013052196312991114, "loss": 1.3426, "step": 2927 }, { "epoch": 0.4194241512677267, "grad_norm": 0.9496454000473022, "learning_rate": 0.00013047777609069146, "loss": 1.4468, "step": 2928 }, { "epoch": 0.4195673972210285, "grad_norm": 1.0863112211227417, "learning_rate": 0.00013043358249036077, "loss": 1.4123, "step": 2929 }, { "epoch": 0.4197106431743303, "grad_norm": 1.052915096282959, "learning_rate": 0.00013038938233843287, "loss": 1.5495, "step": 2930 }, { "epoch": 0.41985388912763216, "grad_norm": 1.0582530498504639, "learning_rate": 0.000130345175644423, "loss": 1.4478, "step": 2931 }, { "epoch": 0.41999713508093395, "grad_norm": 1.008997917175293, "learning_rate": 0.00013030096241784772, "loss": 1.4705, "step": 2932 }, { "epoch": 0.4201403810342358, "grad_norm": 1.0599136352539062, "learning_rate": 0.0001302567426682251, "loss": 1.4005, "step": 2933 }, { "epoch": 0.42028362698753763, "grad_norm": 1.0790364742279053, "learning_rate": 0.00013021251640507448, "loss": 1.3247, "step": 2934 }, { "epoch": 0.4204268729408394, "grad_norm": 1.065268874168396, "learning_rate": 0.00013016828363791673, "loss": 1.5966, "step": 2935 }, { "epoch": 0.42057011889414125, "grad_norm": 1.0589488744735718, "learning_rate": 0.00013012404437627407, "loss": 1.5706, "step": 2936 }, { "epoch": 0.42071336484744304, "grad_norm": 1.2438745498657227, "learning_rate": 0.00013007979862967014, "loss": 1.4395, "step": 2937 }, { "epoch": 0.4208566108007449, "grad_norm": 1.17854905128479, "learning_rate": 0.00013003554640762986, "loss": 1.5951, "step": 2938 }, { "epoch": 0.4209998567540467, "grad_norm": 0.9444679617881775, "learning_rate": 0.00012999128771967977, "loss": 1.5462, "step": 2939 }, { "epoch": 0.4211431027073485, "grad_norm": 1.0070197582244873, "learning_rate": 0.00012994702257534756, "loss": 1.6002, "step": 2940 }, { "epoch": 0.42128634866065034, "grad_norm": 1.1389254331588745, "learning_rate": 0.00012990275098416246, "loss": 1.5906, "step": 2941 }, { "epoch": 0.42142959461395213, "grad_norm": 1.1540002822875977, "learning_rate": 0.00012985847295565508, "loss": 1.4798, "step": 2942 }, { "epoch": 0.42157284056725397, "grad_norm": 1.148228406906128, "learning_rate": 0.00012981418849935733, "loss": 1.5471, "step": 2943 }, { "epoch": 0.4217160865205558, "grad_norm": 1.1624430418014526, "learning_rate": 0.0001297698976248026, "loss": 1.2949, "step": 2944 }, { "epoch": 0.4218593324738576, "grad_norm": 1.074743628501892, "learning_rate": 0.00012972560034152562, "loss": 1.4902, "step": 2945 }, { "epoch": 0.42200257842715944, "grad_norm": 1.004320502281189, "learning_rate": 0.00012968129665906242, "loss": 1.4159, "step": 2946 }, { "epoch": 0.4221458243804613, "grad_norm": 1.2791447639465332, "learning_rate": 0.0001296369865869506, "loss": 1.4524, "step": 2947 }, { "epoch": 0.42228907033376306, "grad_norm": 1.2297550439834595, "learning_rate": 0.00012959267013472892, "loss": 1.4057, "step": 2948 }, { "epoch": 0.4224323162870649, "grad_norm": 1.2139707803726196, "learning_rate": 0.00012954834731193775, "loss": 1.4629, "step": 2949 }, { "epoch": 0.4225755622403667, "grad_norm": 0.8826777935028076, "learning_rate": 0.00012950401812811854, "loss": 1.3843, "step": 2950 }, { "epoch": 0.4227188081936685, "grad_norm": 1.01175856590271, "learning_rate": 0.00012945968259281437, "loss": 1.3502, "step": 2951 }, { "epoch": 0.42286205414697037, "grad_norm": 1.467247724533081, "learning_rate": 0.00012941534071556952, "loss": 1.3557, "step": 2952 }, { "epoch": 0.42300530010027215, "grad_norm": 1.1977813243865967, "learning_rate": 0.00012937099250592975, "loss": 1.4897, "step": 2953 }, { "epoch": 0.423148546053574, "grad_norm": 1.0589356422424316, "learning_rate": 0.00012932663797344214, "loss": 1.5279, "step": 2954 }, { "epoch": 0.42329179200687583, "grad_norm": 1.0807514190673828, "learning_rate": 0.00012928227712765504, "loss": 1.4172, "step": 2955 }, { "epoch": 0.4234350379601776, "grad_norm": 1.0285794734954834, "learning_rate": 0.00012923790997811834, "loss": 1.5824, "step": 2956 }, { "epoch": 0.42357828391347946, "grad_norm": 1.0368008613586426, "learning_rate": 0.00012919353653438313, "loss": 1.7461, "step": 2957 }, { "epoch": 0.42372152986678124, "grad_norm": 1.0917025804519653, "learning_rate": 0.00012914915680600195, "loss": 1.3869, "step": 2958 }, { "epoch": 0.4238647758200831, "grad_norm": 1.2777318954467773, "learning_rate": 0.00012910477080252858, "loss": 1.5453, "step": 2959 }, { "epoch": 0.4240080217733849, "grad_norm": 1.106281042098999, "learning_rate": 0.00012906037853351835, "loss": 1.286, "step": 2960 }, { "epoch": 0.4241512677266867, "grad_norm": 1.4309871196746826, "learning_rate": 0.00012901598000852774, "loss": 1.4638, "step": 2961 }, { "epoch": 0.42429451367998855, "grad_norm": 0.9520977735519409, "learning_rate": 0.0001289715752371147, "loss": 1.6378, "step": 2962 }, { "epoch": 0.42443775963329033, "grad_norm": 1.0166503190994263, "learning_rate": 0.00012892716422883838, "loss": 1.6441, "step": 2963 }, { "epoch": 0.4245810055865922, "grad_norm": 0.9065435528755188, "learning_rate": 0.0001288827469932595, "loss": 1.5039, "step": 2964 }, { "epoch": 0.424724251539894, "grad_norm": 0.9011521339416504, "learning_rate": 0.00012883832353993986, "loss": 1.4887, "step": 2965 }, { "epoch": 0.4248674974931958, "grad_norm": 1.3229097127914429, "learning_rate": 0.00012879389387844285, "loss": 1.4829, "step": 2966 }, { "epoch": 0.42501074344649764, "grad_norm": 1.1755478382110596, "learning_rate": 0.00012874945801833306, "loss": 1.2967, "step": 2967 }, { "epoch": 0.4251539893997995, "grad_norm": 1.1517176628112793, "learning_rate": 0.00012870501596917632, "loss": 1.6385, "step": 2968 }, { "epoch": 0.42529723535310127, "grad_norm": 1.0937844514846802, "learning_rate": 0.00012866056774054002, "loss": 1.5154, "step": 2969 }, { "epoch": 0.4254404813064031, "grad_norm": 1.2419525384902954, "learning_rate": 0.0001286161133419927, "loss": 1.5851, "step": 2970 }, { "epoch": 0.4255837272597049, "grad_norm": 1.0323566198349, "learning_rate": 0.00012857165278310436, "loss": 1.672, "step": 2971 }, { "epoch": 0.42572697321300673, "grad_norm": 1.0173742771148682, "learning_rate": 0.00012852718607344617, "loss": 1.4588, "step": 2972 }, { "epoch": 0.4258702191663086, "grad_norm": 1.046380639076233, "learning_rate": 0.0001284827132225908, "loss": 1.3932, "step": 2973 }, { "epoch": 0.42601346511961036, "grad_norm": 0.9603387713432312, "learning_rate": 0.00012843823424011207, "loss": 1.4069, "step": 2974 }, { "epoch": 0.4261567110729122, "grad_norm": 1.0051465034484863, "learning_rate": 0.00012839374913558527, "loss": 1.5149, "step": 2975 }, { "epoch": 0.426299957026214, "grad_norm": 1.2073255777359009, "learning_rate": 0.0001283492579185869, "loss": 1.4158, "step": 2976 }, { "epoch": 0.4264432029795158, "grad_norm": 0.9852157831192017, "learning_rate": 0.00012830476059869482, "loss": 1.4799, "step": 2977 }, { "epoch": 0.42658644893281766, "grad_norm": 1.0413848161697388, "learning_rate": 0.0001282602571854883, "loss": 1.343, "step": 2978 }, { "epoch": 0.42672969488611945, "grad_norm": 1.1822084188461304, "learning_rate": 0.00012821574768854765, "loss": 1.5502, "step": 2979 }, { "epoch": 0.4268729408394213, "grad_norm": 0.9565073251724243, "learning_rate": 0.0001281712321174548, "loss": 1.5571, "step": 2980 }, { "epoch": 0.42701618679272313, "grad_norm": 1.3902720212936401, "learning_rate": 0.00012812671048179276, "loss": 1.4454, "step": 2981 }, { "epoch": 0.4271594327460249, "grad_norm": 1.109741449356079, "learning_rate": 0.00012808218279114603, "loss": 1.5342, "step": 2982 }, { "epoch": 0.42730267869932675, "grad_norm": 1.0324418544769287, "learning_rate": 0.00012803764905510025, "loss": 1.4393, "step": 2983 }, { "epoch": 0.42744592465262854, "grad_norm": 1.1344586610794067, "learning_rate": 0.00012799310928324244, "loss": 1.4596, "step": 2984 }, { "epoch": 0.4275891706059304, "grad_norm": 1.2849620580673218, "learning_rate": 0.00012794856348516095, "loss": 1.376, "step": 2985 }, { "epoch": 0.4277324165592322, "grad_norm": 0.877849280834198, "learning_rate": 0.00012790401167044535, "loss": 1.3614, "step": 2986 }, { "epoch": 0.427875662512534, "grad_norm": 1.3338195085525513, "learning_rate": 0.00012785945384868658, "loss": 1.312, "step": 2987 }, { "epoch": 0.42801890846583585, "grad_norm": 0.9819057583808899, "learning_rate": 0.00012781489002947676, "loss": 1.2258, "step": 2988 }, { "epoch": 0.4281621544191377, "grad_norm": 1.21397864818573, "learning_rate": 0.00012777032022240947, "loss": 1.4147, "step": 2989 }, { "epoch": 0.42830540037243947, "grad_norm": 1.0348050594329834, "learning_rate": 0.00012772574443707942, "loss": 1.43, "step": 2990 }, { "epoch": 0.4284486463257413, "grad_norm": 1.079624891281128, "learning_rate": 0.0001276811626830827, "loss": 1.4671, "step": 2991 }, { "epoch": 0.4285918922790431, "grad_norm": 1.2637993097305298, "learning_rate": 0.00012763657497001668, "loss": 1.3709, "step": 2992 }, { "epoch": 0.42873513823234494, "grad_norm": 1.3295892477035522, "learning_rate": 0.00012759198130747994, "loss": 1.2281, "step": 2993 }, { "epoch": 0.4288783841856468, "grad_norm": 1.0518338680267334, "learning_rate": 0.00012754738170507243, "loss": 1.4146, "step": 2994 }, { "epoch": 0.42902163013894856, "grad_norm": 1.0216201543807983, "learning_rate": 0.00012750277617239533, "loss": 1.4083, "step": 2995 }, { "epoch": 0.4291648760922504, "grad_norm": 1.0603731870651245, "learning_rate": 0.00012745816471905114, "loss": 1.5712, "step": 2996 }, { "epoch": 0.4293081220455522, "grad_norm": 1.243438482284546, "learning_rate": 0.0001274135473546435, "loss": 1.5306, "step": 2997 }, { "epoch": 0.42945136799885403, "grad_norm": 1.1117500066757202, "learning_rate": 0.00012736892408877758, "loss": 1.4488, "step": 2998 }, { "epoch": 0.42959461395215587, "grad_norm": 1.16628897190094, "learning_rate": 0.0001273242949310595, "loss": 1.4502, "step": 2999 }, { "epoch": 0.42973785990545765, "grad_norm": 1.5213935375213623, "learning_rate": 0.00012727965989109692, "loss": 1.311, "step": 3000 }, { "epoch": 0.4298811058587595, "grad_norm": 1.4517474174499512, "learning_rate": 0.00012723501897849866, "loss": 1.5248, "step": 3001 }, { "epoch": 0.43002435181206133, "grad_norm": 1.2523372173309326, "learning_rate": 0.00012719037220287476, "loss": 1.225, "step": 3002 }, { "epoch": 0.4301675977653631, "grad_norm": 1.0150803327560425, "learning_rate": 0.00012714571957383658, "loss": 1.5225, "step": 3003 }, { "epoch": 0.43031084371866496, "grad_norm": 1.1522403955459595, "learning_rate": 0.0001271010611009967, "loss": 1.509, "step": 3004 }, { "epoch": 0.43045408967196674, "grad_norm": 1.2456793785095215, "learning_rate": 0.00012705639679396906, "loss": 1.5249, "step": 3005 }, { "epoch": 0.4305973356252686, "grad_norm": 1.0238828659057617, "learning_rate": 0.00012701172666236869, "loss": 1.5292, "step": 3006 }, { "epoch": 0.4307405815785704, "grad_norm": 1.2697727680206299, "learning_rate": 0.00012696705071581205, "loss": 1.45, "step": 3007 }, { "epoch": 0.4308838275318722, "grad_norm": 1.0821571350097656, "learning_rate": 0.0001269223689639167, "loss": 1.4024, "step": 3008 }, { "epoch": 0.43102707348517405, "grad_norm": 1.0689291954040527, "learning_rate": 0.00012687768141630152, "loss": 1.4816, "step": 3009 }, { "epoch": 0.43117031943847584, "grad_norm": 1.1147712469100952, "learning_rate": 0.00012683298808258665, "loss": 1.5372, "step": 3010 }, { "epoch": 0.4313135653917777, "grad_norm": 1.0224536657333374, "learning_rate": 0.00012678828897239348, "loss": 1.5718, "step": 3011 }, { "epoch": 0.4314568113450795, "grad_norm": 1.2083054780960083, "learning_rate": 0.00012674358409534462, "loss": 1.3513, "step": 3012 }, { "epoch": 0.4316000572983813, "grad_norm": 1.0714867115020752, "learning_rate": 0.00012669887346106385, "loss": 1.5939, "step": 3013 }, { "epoch": 0.43174330325168314, "grad_norm": 1.3239539861679077, "learning_rate": 0.00012665415707917638, "loss": 1.4316, "step": 3014 }, { "epoch": 0.431886549204985, "grad_norm": 1.2914295196533203, "learning_rate": 0.00012660943495930845, "loss": 1.3322, "step": 3015 }, { "epoch": 0.43202979515828677, "grad_norm": 1.2645832300186157, "learning_rate": 0.00012656470711108764, "loss": 1.5821, "step": 3016 }, { "epoch": 0.4321730411115886, "grad_norm": 1.0351508855819702, "learning_rate": 0.00012651997354414278, "loss": 1.4938, "step": 3017 }, { "epoch": 0.4323162870648904, "grad_norm": 1.1938503980636597, "learning_rate": 0.00012647523426810386, "loss": 1.3562, "step": 3018 }, { "epoch": 0.43245953301819223, "grad_norm": 0.9267191290855408, "learning_rate": 0.00012643048929260222, "loss": 1.5197, "step": 3019 }, { "epoch": 0.4326027789714941, "grad_norm": 0.9674130082130432, "learning_rate": 0.00012638573862727023, "loss": 1.3878, "step": 3020 }, { "epoch": 0.43274602492479586, "grad_norm": 1.1477378606796265, "learning_rate": 0.0001263409822817417, "loss": 1.4516, "step": 3021 }, { "epoch": 0.4328892708780977, "grad_norm": 1.2554242610931396, "learning_rate": 0.00012629622026565147, "loss": 1.3433, "step": 3022 }, { "epoch": 0.43303251683139954, "grad_norm": 1.2385050058364868, "learning_rate": 0.0001262514525886358, "loss": 1.5042, "step": 3023 }, { "epoch": 0.4331757627847013, "grad_norm": 1.1519674062728882, "learning_rate": 0.00012620667926033197, "loss": 1.3635, "step": 3024 }, { "epoch": 0.43331900873800316, "grad_norm": 1.0280259847640991, "learning_rate": 0.00012616190029037864, "loss": 1.5123, "step": 3025 }, { "epoch": 0.43346225469130495, "grad_norm": 1.2098954916000366, "learning_rate": 0.00012611711568841558, "loss": 1.5378, "step": 3026 }, { "epoch": 0.4336055006446068, "grad_norm": 1.0682852268218994, "learning_rate": 0.0001260723254640838, "loss": 1.368, "step": 3027 }, { "epoch": 0.43374874659790863, "grad_norm": 1.0201326608657837, "learning_rate": 0.00012602752962702555, "loss": 1.2167, "step": 3028 }, { "epoch": 0.4338919925512104, "grad_norm": 0.9865608215332031, "learning_rate": 0.00012598272818688423, "loss": 1.724, "step": 3029 }, { "epoch": 0.43403523850451226, "grad_norm": 1.0396380424499512, "learning_rate": 0.00012593792115330455, "loss": 1.3534, "step": 3030 }, { "epoch": 0.43417848445781404, "grad_norm": 1.1980247497558594, "learning_rate": 0.00012589310853593224, "loss": 1.4163, "step": 3031 }, { "epoch": 0.4343217304111159, "grad_norm": 1.1176769733428955, "learning_rate": 0.00012584829034441446, "loss": 1.4575, "step": 3032 }, { "epoch": 0.4344649763644177, "grad_norm": 1.1439476013183594, "learning_rate": 0.00012580346658839944, "loss": 1.368, "step": 3033 }, { "epoch": 0.4346082223177195, "grad_norm": 1.2689814567565918, "learning_rate": 0.00012575863727753655, "loss": 1.2865, "step": 3034 }, { "epoch": 0.43475146827102135, "grad_norm": 1.260332465171814, "learning_rate": 0.0001257138024214765, "loss": 1.3918, "step": 3035 }, { "epoch": 0.4348947142243232, "grad_norm": 1.0823941230773926, "learning_rate": 0.0001256689620298711, "loss": 1.5517, "step": 3036 }, { "epoch": 0.43503796017762497, "grad_norm": 1.1936004161834717, "learning_rate": 0.00012562411611237342, "loss": 1.5122, "step": 3037 }, { "epoch": 0.4351812061309268, "grad_norm": 0.9758128523826599, "learning_rate": 0.00012557926467863758, "loss": 1.4986, "step": 3038 }, { "epoch": 0.4353244520842286, "grad_norm": 1.4188203811645508, "learning_rate": 0.0001255344077383191, "loss": 1.471, "step": 3039 }, { "epoch": 0.43546769803753044, "grad_norm": 1.0242336988449097, "learning_rate": 0.00012548954530107448, "loss": 1.4611, "step": 3040 }, { "epoch": 0.4356109439908323, "grad_norm": 1.3733683824539185, "learning_rate": 0.00012544467737656153, "loss": 1.5593, "step": 3041 }, { "epoch": 0.43575418994413406, "grad_norm": 1.0285357236862183, "learning_rate": 0.0001253998039744392, "loss": 1.3345, "step": 3042 }, { "epoch": 0.4358974358974359, "grad_norm": 0.9556021690368652, "learning_rate": 0.00012535492510436762, "loss": 1.4751, "step": 3043 }, { "epoch": 0.43604068185073774, "grad_norm": 1.094390630722046, "learning_rate": 0.0001253100407760081, "loss": 1.4566, "step": 3044 }, { "epoch": 0.43618392780403953, "grad_norm": 1.3010364770889282, "learning_rate": 0.00012526515099902313, "loss": 1.4804, "step": 3045 }, { "epoch": 0.43632717375734137, "grad_norm": 1.2758674621582031, "learning_rate": 0.00012522025578307638, "loss": 1.2753, "step": 3046 }, { "epoch": 0.43647041971064315, "grad_norm": 0.996068000793457, "learning_rate": 0.00012517535513783263, "loss": 1.5083, "step": 3047 }, { "epoch": 0.436613665663945, "grad_norm": 1.0624992847442627, "learning_rate": 0.00012513044907295795, "loss": 1.43, "step": 3048 }, { "epoch": 0.43675691161724683, "grad_norm": 1.090915322303772, "learning_rate": 0.00012508553759811946, "loss": 1.3936, "step": 3049 }, { "epoch": 0.4369001575705486, "grad_norm": 1.141618013381958, "learning_rate": 0.00012504062072298544, "loss": 1.4458, "step": 3050 }, { "epoch": 0.43704340352385046, "grad_norm": 1.0165143013000488, "learning_rate": 0.0001249956984572255, "loss": 1.3128, "step": 3051 }, { "epoch": 0.43718664947715224, "grad_norm": 0.9935486912727356, "learning_rate": 0.0001249507708105102, "loss": 1.4738, "step": 3052 }, { "epoch": 0.4373298954304541, "grad_norm": 1.0991129875183105, "learning_rate": 0.00012490583779251142, "loss": 1.547, "step": 3053 }, { "epoch": 0.4374731413837559, "grad_norm": 0.9572281241416931, "learning_rate": 0.00012486089941290206, "loss": 1.4446, "step": 3054 }, { "epoch": 0.4376163873370577, "grad_norm": 1.2859573364257812, "learning_rate": 0.00012481595568135628, "loss": 1.3339, "step": 3055 }, { "epoch": 0.43775963329035955, "grad_norm": 1.1983587741851807, "learning_rate": 0.00012477100660754933, "loss": 1.3494, "step": 3056 }, { "epoch": 0.4379028792436614, "grad_norm": 1.1122018098831177, "learning_rate": 0.00012472605220115765, "loss": 1.5065, "step": 3057 }, { "epoch": 0.4380461251969632, "grad_norm": 1.138338327407837, "learning_rate": 0.0001246810924718588, "loss": 1.4528, "step": 3058 }, { "epoch": 0.438189371150265, "grad_norm": 1.239978313446045, "learning_rate": 0.00012463612742933148, "loss": 1.4125, "step": 3059 }, { "epoch": 0.4383326171035668, "grad_norm": 1.0537867546081543, "learning_rate": 0.0001245911570832556, "loss": 1.2958, "step": 3060 }, { "epoch": 0.43847586305686864, "grad_norm": 0.9920015335083008, "learning_rate": 0.00012454618144331213, "loss": 1.3586, "step": 3061 }, { "epoch": 0.4386191090101705, "grad_norm": 1.1013517379760742, "learning_rate": 0.00012450120051918324, "loss": 1.468, "step": 3062 }, { "epoch": 0.43876235496347227, "grad_norm": 0.9714090824127197, "learning_rate": 0.00012445621432055214, "loss": 1.4832, "step": 3063 }, { "epoch": 0.4389056009167741, "grad_norm": 0.9250453114509583, "learning_rate": 0.00012441122285710335, "loss": 1.5898, "step": 3064 }, { "epoch": 0.4390488468700759, "grad_norm": 1.21511709690094, "learning_rate": 0.00012436622613852234, "loss": 1.391, "step": 3065 }, { "epoch": 0.43919209282337773, "grad_norm": 1.0050164461135864, "learning_rate": 0.0001243212241744958, "loss": 1.4469, "step": 3066 }, { "epoch": 0.4393353387766796, "grad_norm": 1.2763065099716187, "learning_rate": 0.00012427621697471157, "loss": 1.3759, "step": 3067 }, { "epoch": 0.43947858472998136, "grad_norm": 1.1281070709228516, "learning_rate": 0.00012423120454885857, "loss": 1.4813, "step": 3068 }, { "epoch": 0.4396218306832832, "grad_norm": 1.0350903272628784, "learning_rate": 0.00012418618690662685, "loss": 1.3968, "step": 3069 }, { "epoch": 0.43976507663658504, "grad_norm": 1.120147466659546, "learning_rate": 0.00012414116405770758, "loss": 1.3834, "step": 3070 }, { "epoch": 0.4399083225898868, "grad_norm": 0.9993228912353516, "learning_rate": 0.00012409613601179316, "loss": 1.5349, "step": 3071 }, { "epoch": 0.44005156854318866, "grad_norm": 1.1676287651062012, "learning_rate": 0.00012405110277857685, "loss": 1.4776, "step": 3072 }, { "epoch": 0.44019481449649045, "grad_norm": 1.1972886323928833, "learning_rate": 0.00012400606436775336, "loss": 1.4927, "step": 3073 }, { "epoch": 0.4403380604497923, "grad_norm": 1.2024924755096436, "learning_rate": 0.00012396102078901823, "loss": 1.5038, "step": 3074 }, { "epoch": 0.44048130640309413, "grad_norm": 0.9534226655960083, "learning_rate": 0.00012391597205206828, "loss": 1.2454, "step": 3075 }, { "epoch": 0.4406245523563959, "grad_norm": 1.0901786088943481, "learning_rate": 0.00012387091816660136, "loss": 1.4556, "step": 3076 }, { "epoch": 0.44076779830969776, "grad_norm": 1.1119468212127686, "learning_rate": 0.0001238258591423165, "loss": 1.4172, "step": 3077 }, { "epoch": 0.4409110442629996, "grad_norm": 1.366089105606079, "learning_rate": 0.00012378079498891377, "loss": 1.4831, "step": 3078 }, { "epoch": 0.4410542902163014, "grad_norm": 1.2530357837677002, "learning_rate": 0.00012373572571609432, "loss": 1.2852, "step": 3079 }, { "epoch": 0.4411975361696032, "grad_norm": 1.0945103168487549, "learning_rate": 0.00012369065133356052, "loss": 1.4229, "step": 3080 }, { "epoch": 0.441340782122905, "grad_norm": 1.0497305393218994, "learning_rate": 0.00012364557185101576, "loss": 1.3797, "step": 3081 }, { "epoch": 0.44148402807620685, "grad_norm": 1.1022846698760986, "learning_rate": 0.00012360048727816448, "loss": 1.4378, "step": 3082 }, { "epoch": 0.4416272740295087, "grad_norm": 1.064237356185913, "learning_rate": 0.00012355539762471234, "loss": 1.5383, "step": 3083 }, { "epoch": 0.4417705199828105, "grad_norm": 1.0266364812850952, "learning_rate": 0.000123510302900366, "loss": 1.3943, "step": 3084 }, { "epoch": 0.4419137659361123, "grad_norm": 1.1215288639068604, "learning_rate": 0.00012346520311483318, "loss": 1.5223, "step": 3085 }, { "epoch": 0.4420570118894141, "grad_norm": 1.3592530488967896, "learning_rate": 0.00012342009827782284, "loss": 1.2945, "step": 3086 }, { "epoch": 0.44220025784271594, "grad_norm": 1.1827378273010254, "learning_rate": 0.00012337498839904492, "loss": 1.379, "step": 3087 }, { "epoch": 0.4423435037960178, "grad_norm": 1.3468207120895386, "learning_rate": 0.0001233298734882104, "loss": 1.4093, "step": 3088 }, { "epoch": 0.44248674974931956, "grad_norm": 1.1650348901748657, "learning_rate": 0.00012328475355503145, "loss": 1.3834, "step": 3089 }, { "epoch": 0.4426299957026214, "grad_norm": 1.31522798538208, "learning_rate": 0.0001232396286092213, "loss": 1.4305, "step": 3090 }, { "epoch": 0.44277324165592324, "grad_norm": 1.1427093744277954, "learning_rate": 0.00012319449866049416, "loss": 1.4521, "step": 3091 }, { "epoch": 0.44291648760922503, "grad_norm": 1.3706148862838745, "learning_rate": 0.00012314936371856543, "loss": 1.5094, "step": 3092 }, { "epoch": 0.44305973356252687, "grad_norm": 1.0026769638061523, "learning_rate": 0.00012310422379315162, "loss": 1.5859, "step": 3093 }, { "epoch": 0.44320297951582865, "grad_norm": 1.142472267150879, "learning_rate": 0.0001230590788939701, "loss": 1.5192, "step": 3094 }, { "epoch": 0.4433462254691305, "grad_norm": 1.11885666847229, "learning_rate": 0.00012301392903073954, "loss": 1.4231, "step": 3095 }, { "epoch": 0.44348947142243234, "grad_norm": 1.2014662027359009, "learning_rate": 0.0001229687742131796, "loss": 1.5498, "step": 3096 }, { "epoch": 0.4436327173757341, "grad_norm": 1.4500850439071655, "learning_rate": 0.0001229236144510109, "loss": 1.4122, "step": 3097 }, { "epoch": 0.44377596332903596, "grad_norm": 1.1103951930999756, "learning_rate": 0.0001228784497539553, "loss": 1.4943, "step": 3098 }, { "epoch": 0.4439192092823378, "grad_norm": 1.1672239303588867, "learning_rate": 0.00012283328013173563, "loss": 1.4736, "step": 3099 }, { "epoch": 0.4440624552356396, "grad_norm": 1.0671937465667725, "learning_rate": 0.00012278810559407578, "loss": 1.4096, "step": 3100 }, { "epoch": 0.4442057011889414, "grad_norm": 1.1332948207855225, "learning_rate": 0.00012274292615070068, "loss": 1.4526, "step": 3101 }, { "epoch": 0.4443489471422432, "grad_norm": 1.0210916996002197, "learning_rate": 0.00012269774181133643, "loss": 1.6282, "step": 3102 }, { "epoch": 0.44449219309554505, "grad_norm": 1.0761029720306396, "learning_rate": 0.00012265255258571005, "loss": 1.299, "step": 3103 }, { "epoch": 0.4446354390488469, "grad_norm": 0.9963146448135376, "learning_rate": 0.00012260735848354962, "loss": 1.3939, "step": 3104 }, { "epoch": 0.4447786850021487, "grad_norm": 1.0436007976531982, "learning_rate": 0.00012256215951458437, "loss": 1.562, "step": 3105 }, { "epoch": 0.4449219309554505, "grad_norm": 1.1525242328643799, "learning_rate": 0.00012251695568854453, "loss": 1.347, "step": 3106 }, { "epoch": 0.4450651769087523, "grad_norm": 1.0592950582504272, "learning_rate": 0.00012247174701516134, "loss": 1.3837, "step": 3107 }, { "epoch": 0.44520842286205414, "grad_norm": 1.1663986444473267, "learning_rate": 0.00012242653350416708, "loss": 1.4266, "step": 3108 }, { "epoch": 0.445351668815356, "grad_norm": 1.423520565032959, "learning_rate": 0.00012238131516529514, "loss": 1.448, "step": 3109 }, { "epoch": 0.44549491476865777, "grad_norm": 1.1414059400558472, "learning_rate": 0.00012233609200827986, "loss": 1.339, "step": 3110 }, { "epoch": 0.4456381607219596, "grad_norm": 1.3034290075302124, "learning_rate": 0.00012229086404285674, "loss": 1.403, "step": 3111 }, { "epoch": 0.44578140667526145, "grad_norm": 1.1641031503677368, "learning_rate": 0.00012224563127876222, "loss": 1.5103, "step": 3112 }, { "epoch": 0.44592465262856323, "grad_norm": 1.1962319612503052, "learning_rate": 0.00012220039372573373, "loss": 1.2811, "step": 3113 }, { "epoch": 0.4460678985818651, "grad_norm": 1.1172598600387573, "learning_rate": 0.0001221551513935099, "loss": 1.4634, "step": 3114 }, { "epoch": 0.44621114453516686, "grad_norm": 1.0949052572250366, "learning_rate": 0.0001221099042918302, "loss": 1.4911, "step": 3115 }, { "epoch": 0.4463543904884687, "grad_norm": 1.384107232093811, "learning_rate": 0.00012206465243043525, "loss": 1.4994, "step": 3116 }, { "epoch": 0.44649763644177054, "grad_norm": 1.1620814800262451, "learning_rate": 0.00012201939581906662, "loss": 1.4314, "step": 3117 }, { "epoch": 0.4466408823950723, "grad_norm": 1.1559195518493652, "learning_rate": 0.00012197413446746702, "loss": 1.4812, "step": 3118 }, { "epoch": 0.44678412834837417, "grad_norm": 1.0727044343948364, "learning_rate": 0.00012192886838538, "loss": 1.4508, "step": 3119 }, { "epoch": 0.44692737430167595, "grad_norm": 1.341267704963684, "learning_rate": 0.00012188359758255028, "loss": 1.5079, "step": 3120 }, { "epoch": 0.4470706202549778, "grad_norm": 1.0778058767318726, "learning_rate": 0.00012183832206872355, "loss": 1.4923, "step": 3121 }, { "epoch": 0.44721386620827963, "grad_norm": 1.0122088193893433, "learning_rate": 0.00012179304185364646, "loss": 1.5925, "step": 3122 }, { "epoch": 0.4473571121615814, "grad_norm": 1.0031112432479858, "learning_rate": 0.00012174775694706679, "loss": 1.516, "step": 3123 }, { "epoch": 0.44750035811488326, "grad_norm": 1.1362916231155396, "learning_rate": 0.00012170246735873321, "loss": 1.4884, "step": 3124 }, { "epoch": 0.4476436040681851, "grad_norm": 1.112246036529541, "learning_rate": 0.00012165717309839548, "loss": 1.4312, "step": 3125 }, { "epoch": 0.4477868500214869, "grad_norm": 1.0987281799316406, "learning_rate": 0.00012161187417580427, "loss": 1.5845, "step": 3126 }, { "epoch": 0.4479300959747887, "grad_norm": 1.0815194845199585, "learning_rate": 0.0001215665706007114, "loss": 1.4914, "step": 3127 }, { "epoch": 0.4480733419280905, "grad_norm": 1.2938653230667114, "learning_rate": 0.00012152126238286953, "loss": 1.5168, "step": 3128 }, { "epoch": 0.44821658788139235, "grad_norm": 1.1902167797088623, "learning_rate": 0.00012147594953203247, "loss": 1.5087, "step": 3129 }, { "epoch": 0.4483598338346942, "grad_norm": 1.146147608757019, "learning_rate": 0.0001214306320579549, "loss": 1.448, "step": 3130 }, { "epoch": 0.448503079787996, "grad_norm": 1.1689090728759766, "learning_rate": 0.00012138530997039259, "loss": 1.6199, "step": 3131 }, { "epoch": 0.4486463257412978, "grad_norm": 1.1992183923721313, "learning_rate": 0.00012133998327910225, "loss": 1.4455, "step": 3132 }, { "epoch": 0.44878957169459965, "grad_norm": 1.2008085250854492, "learning_rate": 0.00012129465199384157, "loss": 1.5091, "step": 3133 }, { "epoch": 0.44893281764790144, "grad_norm": 1.1654683351516724, "learning_rate": 0.00012124931612436932, "loss": 1.494, "step": 3134 }, { "epoch": 0.4490760636012033, "grad_norm": 1.1424264907836914, "learning_rate": 0.0001212039756804451, "loss": 1.4086, "step": 3135 }, { "epoch": 0.44921930955450506, "grad_norm": 1.245821475982666, "learning_rate": 0.00012115863067182967, "loss": 1.4057, "step": 3136 }, { "epoch": 0.4493625555078069, "grad_norm": 1.4618412256240845, "learning_rate": 0.00012111328110828464, "loss": 1.3526, "step": 3137 }, { "epoch": 0.44950580146110874, "grad_norm": 1.0155736207962036, "learning_rate": 0.00012106792699957263, "loss": 1.6852, "step": 3138 }, { "epoch": 0.44964904741441053, "grad_norm": 1.0592164993286133, "learning_rate": 0.00012102256835545734, "loss": 1.3375, "step": 3139 }, { "epoch": 0.44979229336771237, "grad_norm": 1.0476815700531006, "learning_rate": 0.00012097720518570326, "loss": 1.441, "step": 3140 }, { "epoch": 0.44993553932101416, "grad_norm": 1.0410012006759644, "learning_rate": 0.00012093183750007606, "loss": 1.4251, "step": 3141 }, { "epoch": 0.450078785274316, "grad_norm": 1.078812599182129, "learning_rate": 0.00012088646530834218, "loss": 1.5098, "step": 3142 }, { "epoch": 0.45022203122761784, "grad_norm": 1.173561930656433, "learning_rate": 0.0001208410886202692, "loss": 1.4976, "step": 3143 }, { "epoch": 0.4503652771809196, "grad_norm": 0.9869365692138672, "learning_rate": 0.00012079570744562558, "loss": 1.3745, "step": 3144 }, { "epoch": 0.45050852313422146, "grad_norm": 1.0315371751785278, "learning_rate": 0.00012075032179418076, "loss": 1.4201, "step": 3145 }, { "epoch": 0.4506517690875233, "grad_norm": 1.137511968612671, "learning_rate": 0.00012070493167570516, "loss": 1.6021, "step": 3146 }, { "epoch": 0.4507950150408251, "grad_norm": 1.209009051322937, "learning_rate": 0.00012065953709997009, "loss": 1.5155, "step": 3147 }, { "epoch": 0.4509382609941269, "grad_norm": 1.0661952495574951, "learning_rate": 0.00012061413807674797, "loss": 1.3594, "step": 3148 }, { "epoch": 0.4510815069474287, "grad_norm": 0.9781462550163269, "learning_rate": 0.00012056873461581204, "loss": 1.4059, "step": 3149 }, { "epoch": 0.45122475290073055, "grad_norm": 1.1417714357376099, "learning_rate": 0.00012052332672693656, "loss": 1.3588, "step": 3150 }, { "epoch": 0.4513679988540324, "grad_norm": 1.4227643013000488, "learning_rate": 0.00012047791441989665, "loss": 1.6042, "step": 3151 }, { "epoch": 0.4515112448073342, "grad_norm": 1.178970456123352, "learning_rate": 0.00012043249770446856, "loss": 1.3268, "step": 3152 }, { "epoch": 0.451654490760636, "grad_norm": 1.204473614692688, "learning_rate": 0.00012038707659042934, "loss": 1.5764, "step": 3153 }, { "epoch": 0.45179773671393786, "grad_norm": 0.9864242076873779, "learning_rate": 0.00012034165108755702, "loss": 1.5221, "step": 3154 }, { "epoch": 0.45194098266723964, "grad_norm": 1.1069221496582031, "learning_rate": 0.0001202962212056306, "loss": 1.544, "step": 3155 }, { "epoch": 0.4520842286205415, "grad_norm": 0.9879958629608154, "learning_rate": 0.00012025078695442999, "loss": 1.4061, "step": 3156 }, { "epoch": 0.45222747457384327, "grad_norm": 1.103913426399231, "learning_rate": 0.00012020534834373612, "loss": 1.6199, "step": 3157 }, { "epoch": 0.4523707205271451, "grad_norm": 1.313738226890564, "learning_rate": 0.00012015990538333073, "loss": 1.6016, "step": 3158 }, { "epoch": 0.45251396648044695, "grad_norm": 1.0389853715896606, "learning_rate": 0.00012011445808299659, "loss": 1.5377, "step": 3159 }, { "epoch": 0.45265721243374873, "grad_norm": 1.0367650985717773, "learning_rate": 0.0001200690064525174, "loss": 1.3871, "step": 3160 }, { "epoch": 0.4528004583870506, "grad_norm": 1.2739322185516357, "learning_rate": 0.00012002355050167776, "loss": 1.2641, "step": 3161 }, { "epoch": 0.45294370434035236, "grad_norm": 1.089424967765808, "learning_rate": 0.00011997809024026316, "loss": 1.4099, "step": 3162 }, { "epoch": 0.4530869502936542, "grad_norm": 1.3017481565475464, "learning_rate": 0.00011993262567806012, "loss": 1.4638, "step": 3163 }, { "epoch": 0.45323019624695604, "grad_norm": 1.4463725090026855, "learning_rate": 0.00011988715682485602, "loss": 1.4422, "step": 3164 }, { "epoch": 0.4533734422002578, "grad_norm": 0.990341067314148, "learning_rate": 0.00011984168369043922, "loss": 1.5195, "step": 3165 }, { "epoch": 0.45351668815355967, "grad_norm": 1.113857388496399, "learning_rate": 0.00011979620628459893, "loss": 1.5846, "step": 3166 }, { "epoch": 0.4536599341068615, "grad_norm": 1.1528223752975464, "learning_rate": 0.00011975072461712527, "loss": 1.5592, "step": 3167 }, { "epoch": 0.4538031800601633, "grad_norm": 1.2071025371551514, "learning_rate": 0.00011970523869780938, "loss": 1.278, "step": 3168 }, { "epoch": 0.45394642601346513, "grad_norm": 1.0715287923812866, "learning_rate": 0.00011965974853644321, "loss": 1.3596, "step": 3169 }, { "epoch": 0.4540896719667669, "grad_norm": 1.0038518905639648, "learning_rate": 0.0001196142541428197, "loss": 1.5137, "step": 3170 }, { "epoch": 0.45423291792006876, "grad_norm": 0.8300107717514038, "learning_rate": 0.00011956875552673268, "loss": 1.4952, "step": 3171 }, { "epoch": 0.4543761638733706, "grad_norm": 0.9304805994033813, "learning_rate": 0.00011952325269797677, "loss": 1.361, "step": 3172 }, { "epoch": 0.4545194098266724, "grad_norm": 0.8712587952613831, "learning_rate": 0.00011947774566634775, "loss": 1.5969, "step": 3173 }, { "epoch": 0.4546626557799742, "grad_norm": 1.1067285537719727, "learning_rate": 0.00011943223444164205, "loss": 1.5286, "step": 3174 }, { "epoch": 0.454805901733276, "grad_norm": 1.157167911529541, "learning_rate": 0.00011938671903365717, "loss": 1.3667, "step": 3175 }, { "epoch": 0.45494914768657785, "grad_norm": 0.8720133304595947, "learning_rate": 0.0001193411994521914, "loss": 1.4309, "step": 3176 }, { "epoch": 0.4550923936398797, "grad_norm": 1.030298113822937, "learning_rate": 0.00011929567570704403, "loss": 1.3805, "step": 3177 }, { "epoch": 0.4552356395931815, "grad_norm": 1.0273535251617432, "learning_rate": 0.00011925014780801516, "loss": 1.3773, "step": 3178 }, { "epoch": 0.4553788855464833, "grad_norm": 0.9274500012397766, "learning_rate": 0.00011920461576490584, "loss": 1.4026, "step": 3179 }, { "epoch": 0.45552213149978515, "grad_norm": 1.0567808151245117, "learning_rate": 0.000119159079587518, "loss": 1.5002, "step": 3180 }, { "epoch": 0.45566537745308694, "grad_norm": 1.0315468311309814, "learning_rate": 0.0001191135392856544, "loss": 1.5916, "step": 3181 }, { "epoch": 0.4558086234063888, "grad_norm": 1.203462839126587, "learning_rate": 0.00011906799486911884, "loss": 1.5059, "step": 3182 }, { "epoch": 0.45595186935969056, "grad_norm": 1.271903157234192, "learning_rate": 0.0001190224463477158, "loss": 1.389, "step": 3183 }, { "epoch": 0.4560951153129924, "grad_norm": 1.1927798986434937, "learning_rate": 0.00011897689373125081, "loss": 1.3348, "step": 3184 }, { "epoch": 0.45623836126629425, "grad_norm": 1.182523250579834, "learning_rate": 0.00011893133702953018, "loss": 1.4619, "step": 3185 }, { "epoch": 0.45638160721959603, "grad_norm": 1.153469204902649, "learning_rate": 0.00011888577625236122, "loss": 1.3313, "step": 3186 }, { "epoch": 0.45652485317289787, "grad_norm": 1.1265850067138672, "learning_rate": 0.00011884021140955193, "loss": 1.4595, "step": 3187 }, { "epoch": 0.4566680991261997, "grad_norm": 0.9332097172737122, "learning_rate": 0.00011879464251091135, "loss": 1.495, "step": 3188 }, { "epoch": 0.4568113450795015, "grad_norm": 1.0890226364135742, "learning_rate": 0.00011874906956624934, "loss": 1.6299, "step": 3189 }, { "epoch": 0.45695459103280334, "grad_norm": 1.1782417297363281, "learning_rate": 0.00011870349258537663, "loss": 1.424, "step": 3190 }, { "epoch": 0.4570978369861051, "grad_norm": 1.16672682762146, "learning_rate": 0.00011865791157810482, "loss": 1.4099, "step": 3191 }, { "epoch": 0.45724108293940696, "grad_norm": 1.0685315132141113, "learning_rate": 0.00011861232655424633, "loss": 1.4614, "step": 3192 }, { "epoch": 0.4573843288927088, "grad_norm": 1.0267964601516724, "learning_rate": 0.00011856673752361453, "loss": 1.5197, "step": 3193 }, { "epoch": 0.4575275748460106, "grad_norm": 0.9944993853569031, "learning_rate": 0.00011852114449602358, "loss": 1.4405, "step": 3194 }, { "epoch": 0.45767082079931243, "grad_norm": 1.0819752216339111, "learning_rate": 0.0001184755474812886, "loss": 1.1958, "step": 3195 }, { "epoch": 0.4578140667526142, "grad_norm": 1.08761465549469, "learning_rate": 0.00011842994648922536, "loss": 1.5107, "step": 3196 }, { "epoch": 0.45795731270591605, "grad_norm": 1.0154162645339966, "learning_rate": 0.00011838434152965079, "loss": 1.4732, "step": 3197 }, { "epoch": 0.4581005586592179, "grad_norm": 1.2753942012786865, "learning_rate": 0.0001183387326123824, "loss": 1.6183, "step": 3198 }, { "epoch": 0.4582438046125197, "grad_norm": 0.9973735809326172, "learning_rate": 0.00011829311974723867, "loss": 1.3681, "step": 3199 }, { "epoch": 0.4583870505658215, "grad_norm": 1.1050772666931152, "learning_rate": 0.00011824750294403899, "loss": 1.5178, "step": 3200 }, { "epoch": 0.45853029651912336, "grad_norm": 1.323988676071167, "learning_rate": 0.00011820188221260341, "loss": 1.3708, "step": 3201 }, { "epoch": 0.45867354247242514, "grad_norm": 1.262052059173584, "learning_rate": 0.0001181562575627531, "loss": 1.1791, "step": 3202 }, { "epoch": 0.458816788425727, "grad_norm": 1.0263917446136475, "learning_rate": 0.00011811062900430978, "loss": 1.388, "step": 3203 }, { "epoch": 0.45896003437902877, "grad_norm": 1.0436040163040161, "learning_rate": 0.00011806499654709621, "loss": 1.5122, "step": 3204 }, { "epoch": 0.4591032803323306, "grad_norm": 0.9282182455062866, "learning_rate": 0.00011801936020093594, "loss": 1.379, "step": 3205 }, { "epoch": 0.45924652628563245, "grad_norm": 1.119687557220459, "learning_rate": 0.00011797371997565332, "loss": 1.5841, "step": 3206 }, { "epoch": 0.45938977223893424, "grad_norm": 1.2043997049331665, "learning_rate": 0.00011792807588107357, "loss": 1.2598, "step": 3207 }, { "epoch": 0.4595330181922361, "grad_norm": 1.0688637495040894, "learning_rate": 0.00011788242792702275, "loss": 1.4565, "step": 3208 }, { "epoch": 0.4596762641455379, "grad_norm": 0.9249189496040344, "learning_rate": 0.00011783677612332769, "loss": 1.4079, "step": 3209 }, { "epoch": 0.4598195100988397, "grad_norm": 1.0595513582229614, "learning_rate": 0.00011779112047981613, "loss": 1.4279, "step": 3210 }, { "epoch": 0.45996275605214154, "grad_norm": 0.9271141290664673, "learning_rate": 0.00011774546100631662, "loss": 1.4195, "step": 3211 }, { "epoch": 0.4601060020054433, "grad_norm": 1.0756725072860718, "learning_rate": 0.00011769979771265846, "loss": 1.4055, "step": 3212 }, { "epoch": 0.46024924795874517, "grad_norm": 1.086134433746338, "learning_rate": 0.00011765413060867185, "loss": 1.4966, "step": 3213 }, { "epoch": 0.460392493912047, "grad_norm": 1.2458773851394653, "learning_rate": 0.00011760845970418782, "loss": 1.3551, "step": 3214 }, { "epoch": 0.4605357398653488, "grad_norm": 1.055637001991272, "learning_rate": 0.00011756278500903812, "loss": 1.4505, "step": 3215 }, { "epoch": 0.46067898581865063, "grad_norm": 1.1264960765838623, "learning_rate": 0.00011751710653305546, "loss": 1.5455, "step": 3216 }, { "epoch": 0.4608222317719524, "grad_norm": 0.9137976765632629, "learning_rate": 0.00011747142428607318, "loss": 1.5716, "step": 3217 }, { "epoch": 0.46096547772525426, "grad_norm": 1.4584661722183228, "learning_rate": 0.00011742573827792567, "loss": 1.438, "step": 3218 }, { "epoch": 0.4611087236785561, "grad_norm": 1.1215171813964844, "learning_rate": 0.0001173800485184479, "loss": 1.2736, "step": 3219 }, { "epoch": 0.4612519696318579, "grad_norm": 1.2485909461975098, "learning_rate": 0.00011733435501747578, "loss": 1.4094, "step": 3220 }, { "epoch": 0.4613952155851597, "grad_norm": 1.0997424125671387, "learning_rate": 0.00011728865778484597, "loss": 1.3956, "step": 3221 }, { "epoch": 0.46153846153846156, "grad_norm": 0.9940543174743652, "learning_rate": 0.00011724295683039599, "loss": 1.5838, "step": 3222 }, { "epoch": 0.46168170749176335, "grad_norm": 1.0352367162704468, "learning_rate": 0.00011719725216396409, "loss": 1.4961, "step": 3223 }, { "epoch": 0.4618249534450652, "grad_norm": 1.2422566413879395, "learning_rate": 0.00011715154379538935, "loss": 1.3742, "step": 3224 }, { "epoch": 0.461968199398367, "grad_norm": 1.1647753715515137, "learning_rate": 0.0001171058317345117, "loss": 1.408, "step": 3225 }, { "epoch": 0.4621114453516688, "grad_norm": 1.2552604675292969, "learning_rate": 0.00011706011599117173, "loss": 1.6675, "step": 3226 }, { "epoch": 0.46225469130497066, "grad_norm": 1.030846118927002, "learning_rate": 0.000117014396575211, "loss": 1.3646, "step": 3227 }, { "epoch": 0.46239793725827244, "grad_norm": 1.085591435432434, "learning_rate": 0.00011696867349647171, "loss": 1.4564, "step": 3228 }, { "epoch": 0.4625411832115743, "grad_norm": 1.2605788707733154, "learning_rate": 0.00011692294676479696, "loss": 1.474, "step": 3229 }, { "epoch": 0.46268442916487607, "grad_norm": 0.988599956035614, "learning_rate": 0.00011687721639003051, "loss": 1.3967, "step": 3230 }, { "epoch": 0.4628276751181779, "grad_norm": 1.2275782823562622, "learning_rate": 0.00011683148238201704, "loss": 1.361, "step": 3231 }, { "epoch": 0.46297092107147975, "grad_norm": 1.096309781074524, "learning_rate": 0.00011678574475060191, "loss": 1.2918, "step": 3232 }, { "epoch": 0.46311416702478153, "grad_norm": 1.1264857053756714, "learning_rate": 0.00011674000350563133, "loss": 1.2893, "step": 3233 }, { "epoch": 0.46325741297808337, "grad_norm": 1.0949419736862183, "learning_rate": 0.00011669425865695223, "loss": 1.4873, "step": 3234 }, { "epoch": 0.4634006589313852, "grad_norm": 1.0669584274291992, "learning_rate": 0.00011664851021441237, "loss": 1.4774, "step": 3235 }, { "epoch": 0.463543904884687, "grad_norm": 1.2258151769638062, "learning_rate": 0.00011660275818786027, "loss": 1.443, "step": 3236 }, { "epoch": 0.46368715083798884, "grad_norm": 1.3936161994934082, "learning_rate": 0.00011655700258714517, "loss": 1.4439, "step": 3237 }, { "epoch": 0.4638303967912906, "grad_norm": 1.1493662595748901, "learning_rate": 0.00011651124342211712, "loss": 1.4592, "step": 3238 }, { "epoch": 0.46397364274459246, "grad_norm": 1.0491929054260254, "learning_rate": 0.00011646548070262695, "loss": 1.4272, "step": 3239 }, { "epoch": 0.4641168886978943, "grad_norm": 1.2662327289581299, "learning_rate": 0.00011641971443852627, "loss": 1.4197, "step": 3240 }, { "epoch": 0.4642601346511961, "grad_norm": 1.0402677059173584, "learning_rate": 0.00011637394463966737, "loss": 1.3199, "step": 3241 }, { "epoch": 0.46440338060449793, "grad_norm": 1.0888824462890625, "learning_rate": 0.00011632817131590339, "loss": 1.5224, "step": 3242 }, { "epoch": 0.46454662655779977, "grad_norm": 0.9686537384986877, "learning_rate": 0.0001162823944770882, "loss": 1.3672, "step": 3243 }, { "epoch": 0.46468987251110155, "grad_norm": 1.2487537860870361, "learning_rate": 0.00011623661413307639, "loss": 1.3844, "step": 3244 }, { "epoch": 0.4648331184644034, "grad_norm": 1.1117708683013916, "learning_rate": 0.00011619083029372338, "loss": 1.2275, "step": 3245 }, { "epoch": 0.4649763644177052, "grad_norm": 1.2130790948867798, "learning_rate": 0.0001161450429688852, "loss": 1.5362, "step": 3246 }, { "epoch": 0.465119610371007, "grad_norm": 1.1776996850967407, "learning_rate": 0.00011609925216841886, "loss": 1.503, "step": 3247 }, { "epoch": 0.46526285632430886, "grad_norm": 1.0639768838882446, "learning_rate": 0.00011605345790218189, "loss": 1.2413, "step": 3248 }, { "epoch": 0.46540610227761064, "grad_norm": 1.097795009613037, "learning_rate": 0.0001160076601800327, "loss": 1.6393, "step": 3249 }, { "epoch": 0.4655493482309125, "grad_norm": 0.9664180874824524, "learning_rate": 0.00011596185901183043, "loss": 1.5207, "step": 3250 }, { "epoch": 0.46569259418421427, "grad_norm": 1.0399538278579712, "learning_rate": 0.00011591605440743488, "loss": 1.6099, "step": 3251 }, { "epoch": 0.4658358401375161, "grad_norm": 1.1419068574905396, "learning_rate": 0.00011587024637670669, "loss": 1.4907, "step": 3252 }, { "epoch": 0.46597908609081795, "grad_norm": 0.92630535364151, "learning_rate": 0.00011582443492950716, "loss": 1.4683, "step": 3253 }, { "epoch": 0.46612233204411974, "grad_norm": 1.26869535446167, "learning_rate": 0.00011577862007569842, "loss": 1.5627, "step": 3254 }, { "epoch": 0.4662655779974216, "grad_norm": 1.1297804117202759, "learning_rate": 0.00011573280182514321, "loss": 1.5184, "step": 3255 }, { "epoch": 0.4664088239507234, "grad_norm": 1.5263715982437134, "learning_rate": 0.00011568698018770512, "loss": 1.4713, "step": 3256 }, { "epoch": 0.4665520699040252, "grad_norm": 1.0389893054962158, "learning_rate": 0.00011564115517324836, "loss": 1.5073, "step": 3257 }, { "epoch": 0.46669531585732704, "grad_norm": 1.1668983697891235, "learning_rate": 0.00011559532679163796, "loss": 1.3207, "step": 3258 }, { "epoch": 0.4668385618106288, "grad_norm": 1.0060150623321533, "learning_rate": 0.00011554949505273962, "loss": 1.2718, "step": 3259 }, { "epoch": 0.46698180776393067, "grad_norm": 1.1631263494491577, "learning_rate": 0.00011550365996641979, "loss": 1.3085, "step": 3260 }, { "epoch": 0.4671250537172325, "grad_norm": 1.244558334350586, "learning_rate": 0.00011545782154254565, "loss": 1.5046, "step": 3261 }, { "epoch": 0.4672682996705343, "grad_norm": 1.0249354839324951, "learning_rate": 0.00011541197979098501, "loss": 1.4909, "step": 3262 }, { "epoch": 0.46741154562383613, "grad_norm": 1.0131266117095947, "learning_rate": 0.00011536613472160653, "loss": 1.2331, "step": 3263 }, { "epoch": 0.4675547915771379, "grad_norm": 1.1467456817626953, "learning_rate": 0.00011532028634427949, "loss": 1.4885, "step": 3264 }, { "epoch": 0.46769803753043976, "grad_norm": 0.9395096302032471, "learning_rate": 0.00011527443466887393, "loss": 1.1577, "step": 3265 }, { "epoch": 0.4678412834837416, "grad_norm": 1.2805176973342896, "learning_rate": 0.00011522857970526058, "loss": 1.4491, "step": 3266 }, { "epoch": 0.4679845294370434, "grad_norm": 1.0709673166275024, "learning_rate": 0.00011518272146331082, "loss": 1.3827, "step": 3267 }, { "epoch": 0.4681277753903452, "grad_norm": 1.2241272926330566, "learning_rate": 0.00011513685995289689, "loss": 1.5112, "step": 3268 }, { "epoch": 0.46827102134364706, "grad_norm": 1.0638458728790283, "learning_rate": 0.00011509099518389156, "loss": 1.4051, "step": 3269 }, { "epoch": 0.46841426729694885, "grad_norm": 1.0161278247833252, "learning_rate": 0.00011504512716616846, "loss": 1.5547, "step": 3270 }, { "epoch": 0.4685575132502507, "grad_norm": 0.9485410451889038, "learning_rate": 0.00011499925590960172, "loss": 1.4866, "step": 3271 }, { "epoch": 0.4687007592035525, "grad_norm": 1.0508158206939697, "learning_rate": 0.00011495338142406643, "loss": 1.3563, "step": 3272 }, { "epoch": 0.4688440051568543, "grad_norm": 1.0126726627349854, "learning_rate": 0.00011490750371943813, "loss": 1.2948, "step": 3273 }, { "epoch": 0.46898725111015616, "grad_norm": 1.0145466327667236, "learning_rate": 0.00011486162280559316, "loss": 1.5088, "step": 3274 }, { "epoch": 0.46913049706345794, "grad_norm": 1.1184942722320557, "learning_rate": 0.0001148157386924086, "loss": 1.2719, "step": 3275 }, { "epoch": 0.4692737430167598, "grad_norm": 1.19002366065979, "learning_rate": 0.00011476985138976209, "loss": 1.4071, "step": 3276 }, { "epoch": 0.4694169889700616, "grad_norm": 1.0520473718643188, "learning_rate": 0.0001147239609075321, "loss": 1.354, "step": 3277 }, { "epoch": 0.4695602349233634, "grad_norm": 1.0657501220703125, "learning_rate": 0.00011467806725559769, "loss": 1.3836, "step": 3278 }, { "epoch": 0.46970348087666525, "grad_norm": 1.15292227268219, "learning_rate": 0.00011463217044383865, "loss": 1.4323, "step": 3279 }, { "epoch": 0.46984672682996703, "grad_norm": 1.1347779035568237, "learning_rate": 0.00011458627048213535, "loss": 1.3662, "step": 3280 }, { "epoch": 0.4699899727832689, "grad_norm": 1.0751999616622925, "learning_rate": 0.00011454036738036899, "loss": 1.338, "step": 3281 }, { "epoch": 0.4701332187365707, "grad_norm": 1.1605064868927002, "learning_rate": 0.00011449446114842137, "loss": 1.3779, "step": 3282 }, { "epoch": 0.4702764646898725, "grad_norm": 0.9580484628677368, "learning_rate": 0.00011444855179617493, "loss": 1.6082, "step": 3283 }, { "epoch": 0.47041971064317434, "grad_norm": 1.0737966299057007, "learning_rate": 0.00011440263933351283, "loss": 1.4823, "step": 3284 }, { "epoch": 0.4705629565964761, "grad_norm": 1.0176937580108643, "learning_rate": 0.00011435672377031889, "loss": 1.4786, "step": 3285 }, { "epoch": 0.47070620254977796, "grad_norm": 1.1679856777191162, "learning_rate": 0.00011431080511647763, "loss": 1.2433, "step": 3286 }, { "epoch": 0.4708494485030798, "grad_norm": 1.0988603830337524, "learning_rate": 0.00011426488338187414, "loss": 1.4521, "step": 3287 }, { "epoch": 0.4709926944563816, "grad_norm": 1.0639088153839111, "learning_rate": 0.00011421895857639424, "loss": 1.4075, "step": 3288 }, { "epoch": 0.47113594040968343, "grad_norm": 0.9400182962417603, "learning_rate": 0.00011417303070992445, "loss": 1.4765, "step": 3289 }, { "epoch": 0.47127918636298527, "grad_norm": 1.0353741645812988, "learning_rate": 0.00011412709979235187, "loss": 1.4377, "step": 3290 }, { "epoch": 0.47142243231628705, "grad_norm": 1.0382957458496094, "learning_rate": 0.0001140811658335643, "loss": 1.4198, "step": 3291 }, { "epoch": 0.4715656782695889, "grad_norm": 1.119065761566162, "learning_rate": 0.00011403522884345017, "loss": 1.5738, "step": 3292 }, { "epoch": 0.4717089242228907, "grad_norm": 0.8763836026191711, "learning_rate": 0.00011398928883189859, "loss": 1.469, "step": 3293 }, { "epoch": 0.4718521701761925, "grad_norm": 0.9945670962333679, "learning_rate": 0.00011394334580879931, "loss": 1.4831, "step": 3294 }, { "epoch": 0.47199541612949436, "grad_norm": 1.1996240615844727, "learning_rate": 0.00011389739978404273, "loss": 1.499, "step": 3295 }, { "epoch": 0.47213866208279615, "grad_norm": 1.0718663930892944, "learning_rate": 0.00011385145076751986, "loss": 1.3571, "step": 3296 }, { "epoch": 0.472281908036098, "grad_norm": 1.35163152217865, "learning_rate": 0.00011380549876912244, "loss": 1.5648, "step": 3297 }, { "epoch": 0.4724251539893998, "grad_norm": 1.1435168981552124, "learning_rate": 0.00011375954379874274, "loss": 1.369, "step": 3298 }, { "epoch": 0.4725683999427016, "grad_norm": 0.876053512096405, "learning_rate": 0.00011371358586627376, "loss": 1.3048, "step": 3299 }, { "epoch": 0.47271164589600345, "grad_norm": 0.9831293821334839, "learning_rate": 0.00011366762498160914, "loss": 1.3607, "step": 3300 }, { "epoch": 0.47285489184930524, "grad_norm": 1.059891939163208, "learning_rate": 0.00011362166115464304, "loss": 1.5376, "step": 3301 }, { "epoch": 0.4729981378026071, "grad_norm": 1.1816636323928833, "learning_rate": 0.00011357569439527038, "loss": 1.4659, "step": 3302 }, { "epoch": 0.4731413837559089, "grad_norm": 1.1875196695327759, "learning_rate": 0.00011352972471338668, "loss": 1.2874, "step": 3303 }, { "epoch": 0.4732846297092107, "grad_norm": 1.2097115516662598, "learning_rate": 0.00011348375211888807, "loss": 1.4972, "step": 3304 }, { "epoch": 0.47342787566251254, "grad_norm": 1.065178632736206, "learning_rate": 0.00011343777662167126, "loss": 1.3953, "step": 3305 }, { "epoch": 0.4735711216158143, "grad_norm": 1.0165363550186157, "learning_rate": 0.0001133917982316337, "loss": 1.6532, "step": 3306 }, { "epoch": 0.47371436756911617, "grad_norm": 1.3845362663269043, "learning_rate": 0.00011334581695867339, "loss": 1.3454, "step": 3307 }, { "epoch": 0.473857613522418, "grad_norm": 1.085850715637207, "learning_rate": 0.00011329983281268892, "loss": 1.5056, "step": 3308 }, { "epoch": 0.4740008594757198, "grad_norm": 0.8846843242645264, "learning_rate": 0.00011325384580357957, "loss": 1.519, "step": 3309 }, { "epoch": 0.47414410542902163, "grad_norm": 1.1567100286483765, "learning_rate": 0.00011320785594124518, "loss": 1.6023, "step": 3310 }, { "epoch": 0.4742873513823235, "grad_norm": 1.1920970678329468, "learning_rate": 0.00011316186323558631, "loss": 1.3781, "step": 3311 }, { "epoch": 0.47443059733562526, "grad_norm": 1.0693937540054321, "learning_rate": 0.00011311586769650395, "loss": 1.6848, "step": 3312 }, { "epoch": 0.4745738432889271, "grad_norm": 1.0067671537399292, "learning_rate": 0.00011306986933389984, "loss": 1.4515, "step": 3313 }, { "epoch": 0.4747170892422289, "grad_norm": 1.185122013092041, "learning_rate": 0.00011302386815767629, "loss": 1.6257, "step": 3314 }, { "epoch": 0.4748603351955307, "grad_norm": 1.0342673063278198, "learning_rate": 0.00011297786417773626, "loss": 1.3211, "step": 3315 }, { "epoch": 0.47500358114883257, "grad_norm": 1.136813759803772, "learning_rate": 0.00011293185740398317, "loss": 1.419, "step": 3316 }, { "epoch": 0.47514682710213435, "grad_norm": 0.9932136535644531, "learning_rate": 0.00011288584784632124, "loss": 1.4535, "step": 3317 }, { "epoch": 0.4752900730554362, "grad_norm": 1.0629267692565918, "learning_rate": 0.00011283983551465511, "loss": 1.4108, "step": 3318 }, { "epoch": 0.475433319008738, "grad_norm": 1.08879554271698, "learning_rate": 0.00011279382041889013, "loss": 1.4213, "step": 3319 }, { "epoch": 0.4755765649620398, "grad_norm": 1.1810017824172974, "learning_rate": 0.00011274780256893225, "loss": 1.4481, "step": 3320 }, { "epoch": 0.47571981091534166, "grad_norm": 1.1317284107208252, "learning_rate": 0.00011270178197468789, "loss": 1.3384, "step": 3321 }, { "epoch": 0.47586305686864344, "grad_norm": 1.1286649703979492, "learning_rate": 0.00011265575864606421, "loss": 1.3778, "step": 3322 }, { "epoch": 0.4760063028219453, "grad_norm": 1.1449493169784546, "learning_rate": 0.00011260973259296888, "loss": 1.2873, "step": 3323 }, { "epoch": 0.4761495487752471, "grad_norm": 1.3122366666793823, "learning_rate": 0.00011256370382531017, "loss": 1.6179, "step": 3324 }, { "epoch": 0.4762927947285489, "grad_norm": 0.9566621780395508, "learning_rate": 0.00011251767235299688, "loss": 1.4554, "step": 3325 }, { "epoch": 0.47643604068185075, "grad_norm": 0.8679423928260803, "learning_rate": 0.00011247163818593856, "loss": 1.3446, "step": 3326 }, { "epoch": 0.47657928663515253, "grad_norm": 1.2780513763427734, "learning_rate": 0.00011242560133404513, "loss": 1.5271, "step": 3327 }, { "epoch": 0.4767225325884544, "grad_norm": 1.3613779544830322, "learning_rate": 0.00011237956180722722, "loss": 1.4222, "step": 3328 }, { "epoch": 0.4768657785417562, "grad_norm": 0.9809669852256775, "learning_rate": 0.00011233351961539605, "loss": 1.4102, "step": 3329 }, { "epoch": 0.477009024495058, "grad_norm": 1.221941351890564, "learning_rate": 0.00011228747476846322, "loss": 1.5474, "step": 3330 }, { "epoch": 0.47715227044835984, "grad_norm": 1.1364434957504272, "learning_rate": 0.00011224142727634122, "loss": 1.5003, "step": 3331 }, { "epoch": 0.4772955164016617, "grad_norm": 1.062312126159668, "learning_rate": 0.00011219537714894282, "loss": 1.4746, "step": 3332 }, { "epoch": 0.47743876235496346, "grad_norm": 0.9702563881874084, "learning_rate": 0.00011214932439618151, "loss": 1.6234, "step": 3333 }, { "epoch": 0.4775820083082653, "grad_norm": 1.1632903814315796, "learning_rate": 0.00011210326902797131, "loss": 1.4115, "step": 3334 }, { "epoch": 0.4777252542615671, "grad_norm": 1.0407432317733765, "learning_rate": 0.00011205721105422679, "loss": 1.3137, "step": 3335 }, { "epoch": 0.47786850021486893, "grad_norm": 0.9921265244483948, "learning_rate": 0.00011201115048486313, "loss": 1.3385, "step": 3336 }, { "epoch": 0.47801174616817077, "grad_norm": 1.224592924118042, "learning_rate": 0.00011196508732979599, "loss": 1.443, "step": 3337 }, { "epoch": 0.47815499212147256, "grad_norm": 0.97806316614151, "learning_rate": 0.00011191902159894161, "loss": 1.505, "step": 3338 }, { "epoch": 0.4782982380747744, "grad_norm": 1.1465346813201904, "learning_rate": 0.00011187295330221686, "loss": 1.5355, "step": 3339 }, { "epoch": 0.4784414840280762, "grad_norm": 1.0608819723129272, "learning_rate": 0.00011182688244953907, "loss": 1.5581, "step": 3340 }, { "epoch": 0.478584729981378, "grad_norm": 1.2137082815170288, "learning_rate": 0.00011178080905082615, "loss": 1.4183, "step": 3341 }, { "epoch": 0.47872797593467986, "grad_norm": 0.9721546769142151, "learning_rate": 0.00011173473311599656, "loss": 1.3798, "step": 3342 }, { "epoch": 0.47887122188798165, "grad_norm": 1.270617961883545, "learning_rate": 0.00011168865465496932, "loss": 1.4943, "step": 3343 }, { "epoch": 0.4790144678412835, "grad_norm": 1.1930001974105835, "learning_rate": 0.000111642573677664, "loss": 1.3323, "step": 3344 }, { "epoch": 0.4791577137945853, "grad_norm": 1.035975456237793, "learning_rate": 0.00011159649019400069, "loss": 1.4723, "step": 3345 }, { "epoch": 0.4793009597478871, "grad_norm": 1.2159650325775146, "learning_rate": 0.00011155040421389996, "loss": 1.3959, "step": 3346 }, { "epoch": 0.47944420570118895, "grad_norm": 1.190435767173767, "learning_rate": 0.00011150431574728308, "loss": 1.4425, "step": 3347 }, { "epoch": 0.47958745165449074, "grad_norm": 1.0722824335098267, "learning_rate": 0.00011145822480407168, "loss": 1.3259, "step": 3348 }, { "epoch": 0.4797306976077926, "grad_norm": 1.1439940929412842, "learning_rate": 0.00011141213139418805, "loss": 1.6281, "step": 3349 }, { "epoch": 0.4798739435610944, "grad_norm": 1.2694685459136963, "learning_rate": 0.00011136603552755489, "loss": 1.4223, "step": 3350 }, { "epoch": 0.4800171895143962, "grad_norm": 1.099033236503601, "learning_rate": 0.00011131993721409559, "loss": 1.5037, "step": 3351 }, { "epoch": 0.48016043546769804, "grad_norm": 0.9075151681900024, "learning_rate": 0.00011127383646373393, "loss": 1.3515, "step": 3352 }, { "epoch": 0.4803036814209999, "grad_norm": 0.9200207591056824, "learning_rate": 0.00011122773328639424, "loss": 1.3497, "step": 3353 }, { "epoch": 0.48044692737430167, "grad_norm": 1.4254951477050781, "learning_rate": 0.00011118162769200146, "loss": 1.4599, "step": 3354 }, { "epoch": 0.4805901733276035, "grad_norm": 1.005355954170227, "learning_rate": 0.00011113551969048089, "loss": 1.3563, "step": 3355 }, { "epoch": 0.4807334192809053, "grad_norm": 1.6896955966949463, "learning_rate": 0.00011108940929175853, "loss": 1.52, "step": 3356 }, { "epoch": 0.48087666523420713, "grad_norm": 0.9772291779518127, "learning_rate": 0.00011104329650576073, "loss": 1.4418, "step": 3357 }, { "epoch": 0.481019911187509, "grad_norm": 1.6666598320007324, "learning_rate": 0.00011099718134241451, "loss": 1.3506, "step": 3358 }, { "epoch": 0.48116315714081076, "grad_norm": 0.9738074541091919, "learning_rate": 0.00011095106381164727, "loss": 1.4856, "step": 3359 }, { "epoch": 0.4813064030941126, "grad_norm": 1.0006673336029053, "learning_rate": 0.00011090494392338697, "loss": 1.539, "step": 3360 }, { "epoch": 0.4814496490474144, "grad_norm": 1.3746137619018555, "learning_rate": 0.00011085882168756212, "loss": 1.4118, "step": 3361 }, { "epoch": 0.4815928950007162, "grad_norm": 1.0909067392349243, "learning_rate": 0.00011081269711410167, "loss": 1.4434, "step": 3362 }, { "epoch": 0.48173614095401807, "grad_norm": 1.0215246677398682, "learning_rate": 0.0001107665702129351, "loss": 1.3228, "step": 3363 }, { "epoch": 0.48187938690731985, "grad_norm": 1.223340392112732, "learning_rate": 0.00011072044099399242, "loss": 1.5585, "step": 3364 }, { "epoch": 0.4820226328606217, "grad_norm": 1.0300712585449219, "learning_rate": 0.00011067430946720408, "loss": 1.4543, "step": 3365 }, { "epoch": 0.48216587881392353, "grad_norm": 1.1743842363357544, "learning_rate": 0.00011062817564250103, "loss": 1.3898, "step": 3366 }, { "epoch": 0.4823091247672253, "grad_norm": 1.2403898239135742, "learning_rate": 0.00011058203952981476, "loss": 1.5558, "step": 3367 }, { "epoch": 0.48245237072052716, "grad_norm": 1.311026930809021, "learning_rate": 0.00011053590113907728, "loss": 1.4761, "step": 3368 }, { "epoch": 0.48259561667382894, "grad_norm": 1.1651062965393066, "learning_rate": 0.000110489760480221, "loss": 1.5753, "step": 3369 }, { "epoch": 0.4827388626271308, "grad_norm": 1.0940343141555786, "learning_rate": 0.00011044361756317887, "loss": 1.4922, "step": 3370 }, { "epoch": 0.4828821085804326, "grad_norm": 1.2779818773269653, "learning_rate": 0.0001103974723978843, "loss": 1.3541, "step": 3371 }, { "epoch": 0.4830253545337344, "grad_norm": 1.5476937294006348, "learning_rate": 0.00011035132499427123, "loss": 1.4233, "step": 3372 }, { "epoch": 0.48316860048703625, "grad_norm": 0.9741626381874084, "learning_rate": 0.00011030517536227405, "loss": 1.2097, "step": 3373 }, { "epoch": 0.48331184644033803, "grad_norm": 1.1045200824737549, "learning_rate": 0.00011025902351182765, "loss": 1.3024, "step": 3374 }, { "epoch": 0.4834550923936399, "grad_norm": 1.168770670890808, "learning_rate": 0.00011021286945286731, "loss": 1.5221, "step": 3375 }, { "epoch": 0.4835983383469417, "grad_norm": 1.171493649482727, "learning_rate": 0.00011016671319532894, "loss": 1.6083, "step": 3376 }, { "epoch": 0.4837415843002435, "grad_norm": 1.0298054218292236, "learning_rate": 0.0001101205547491488, "loss": 1.428, "step": 3377 }, { "epoch": 0.48388483025354534, "grad_norm": 1.0101838111877441, "learning_rate": 0.00011007439412426365, "loss": 1.4277, "step": 3378 }, { "epoch": 0.4840280762068472, "grad_norm": 1.11522376537323, "learning_rate": 0.00011002823133061079, "loss": 1.4073, "step": 3379 }, { "epoch": 0.48417132216014896, "grad_norm": 1.1002283096313477, "learning_rate": 0.00010998206637812783, "loss": 1.4094, "step": 3380 }, { "epoch": 0.4843145681134508, "grad_norm": 1.1121855974197388, "learning_rate": 0.00010993589927675305, "loss": 1.424, "step": 3381 }, { "epoch": 0.4844578140667526, "grad_norm": 1.1498883962631226, "learning_rate": 0.00010988973003642499, "loss": 1.4892, "step": 3382 }, { "epoch": 0.48460106002005443, "grad_norm": 1.2971811294555664, "learning_rate": 0.00010984355866708282, "loss": 1.3987, "step": 3383 }, { "epoch": 0.48474430597335627, "grad_norm": 0.994145393371582, "learning_rate": 0.000109797385178666, "loss": 1.5989, "step": 3384 }, { "epoch": 0.48488755192665806, "grad_norm": 1.0696685314178467, "learning_rate": 0.00010975120958111467, "loss": 1.5071, "step": 3385 }, { "epoch": 0.4850307978799599, "grad_norm": 0.9798629879951477, "learning_rate": 0.00010970503188436918, "loss": 1.7237, "step": 3386 }, { "epoch": 0.48517404383326174, "grad_norm": 1.067047357559204, "learning_rate": 0.0001096588520983705, "loss": 1.5126, "step": 3387 }, { "epoch": 0.4853172897865635, "grad_norm": 1.094775915145874, "learning_rate": 0.00010961267023305996, "loss": 1.3751, "step": 3388 }, { "epoch": 0.48546053573986536, "grad_norm": 0.995444118976593, "learning_rate": 0.00010956648629837943, "loss": 1.3402, "step": 3389 }, { "epoch": 0.48560378169316715, "grad_norm": 1.1569347381591797, "learning_rate": 0.00010952030030427114, "loss": 1.4272, "step": 3390 }, { "epoch": 0.485747027646469, "grad_norm": 1.3626402616500854, "learning_rate": 0.00010947411226067777, "loss": 1.46, "step": 3391 }, { "epoch": 0.48589027359977083, "grad_norm": 1.355678677558899, "learning_rate": 0.00010942792217754245, "loss": 1.3429, "step": 3392 }, { "epoch": 0.4860335195530726, "grad_norm": 1.210789680480957, "learning_rate": 0.00010938173006480881, "loss": 1.6074, "step": 3393 }, { "epoch": 0.48617676550637445, "grad_norm": 0.9643422365188599, "learning_rate": 0.00010933553593242085, "loss": 1.4335, "step": 3394 }, { "epoch": 0.48632001145967624, "grad_norm": 1.046739935874939, "learning_rate": 0.00010928933979032305, "loss": 1.4613, "step": 3395 }, { "epoch": 0.4864632574129781, "grad_norm": 1.1703240871429443, "learning_rate": 0.00010924314164846021, "loss": 1.4556, "step": 3396 }, { "epoch": 0.4866065033662799, "grad_norm": 0.9878906011581421, "learning_rate": 0.00010919694151677778, "loss": 1.4051, "step": 3397 }, { "epoch": 0.4867497493195817, "grad_norm": 1.3072961568832397, "learning_rate": 0.00010915073940522136, "loss": 1.2739, "step": 3398 }, { "epoch": 0.48689299527288354, "grad_norm": 1.1261928081512451, "learning_rate": 0.00010910453532373726, "loss": 1.4672, "step": 3399 }, { "epoch": 0.4870362412261854, "grad_norm": 1.211167812347412, "learning_rate": 0.00010905832928227193, "loss": 1.4634, "step": 3400 }, { "epoch": 0.48717948717948717, "grad_norm": 1.0091819763183594, "learning_rate": 0.00010901212129077252, "loss": 1.51, "step": 3401 }, { "epoch": 0.487322733132789, "grad_norm": 1.2708327770233154, "learning_rate": 0.00010896591135918638, "loss": 1.4039, "step": 3402 }, { "epoch": 0.4874659790860908, "grad_norm": 1.1145243644714355, "learning_rate": 0.00010891969949746141, "loss": 1.3797, "step": 3403 }, { "epoch": 0.48760922503939264, "grad_norm": 1.0332392454147339, "learning_rate": 0.0001088734857155459, "loss": 1.4836, "step": 3404 }, { "epoch": 0.4877524709926945, "grad_norm": 1.1336630582809448, "learning_rate": 0.00010882727002338842, "loss": 1.5205, "step": 3405 }, { "epoch": 0.48789571694599626, "grad_norm": 1.1838899850845337, "learning_rate": 0.00010878105243093821, "loss": 1.4271, "step": 3406 }, { "epoch": 0.4880389628992981, "grad_norm": 1.197563648223877, "learning_rate": 0.00010873483294814471, "loss": 1.4667, "step": 3407 }, { "epoch": 0.48818220885259994, "grad_norm": 1.0738539695739746, "learning_rate": 0.00010868861158495782, "loss": 1.3432, "step": 3408 }, { "epoch": 0.4883254548059017, "grad_norm": 1.0344204902648926, "learning_rate": 0.00010864238835132783, "loss": 1.4408, "step": 3409 }, { "epoch": 0.48846870075920357, "grad_norm": 1.133379340171814, "learning_rate": 0.00010859616325720554, "loss": 1.4679, "step": 3410 }, { "epoch": 0.48861194671250535, "grad_norm": 1.1490164995193481, "learning_rate": 0.000108549936312542, "loss": 1.4928, "step": 3411 }, { "epoch": 0.4887551926658072, "grad_norm": 1.095466136932373, "learning_rate": 0.00010850370752728874, "loss": 1.2896, "step": 3412 }, { "epoch": 0.48889843861910903, "grad_norm": 1.038574457168579, "learning_rate": 0.0001084574769113977, "loss": 1.4865, "step": 3413 }, { "epoch": 0.4890416845724108, "grad_norm": 1.2063612937927246, "learning_rate": 0.00010841124447482115, "loss": 1.439, "step": 3414 }, { "epoch": 0.48918493052571266, "grad_norm": 1.1815158128738403, "learning_rate": 0.00010836501022751184, "loss": 1.2932, "step": 3415 }, { "epoch": 0.48932817647901444, "grad_norm": 1.062836766242981, "learning_rate": 0.00010831877417942283, "loss": 1.4812, "step": 3416 }, { "epoch": 0.4894714224323163, "grad_norm": 1.1808329820632935, "learning_rate": 0.00010827253634050758, "loss": 1.2662, "step": 3417 }, { "epoch": 0.4896146683856181, "grad_norm": 1.1382912397384644, "learning_rate": 0.00010822629672071995, "loss": 1.5181, "step": 3418 }, { "epoch": 0.4897579143389199, "grad_norm": 1.048482894897461, "learning_rate": 0.00010818005533001425, "loss": 1.5647, "step": 3419 }, { "epoch": 0.48990116029222175, "grad_norm": 1.0185450315475464, "learning_rate": 0.00010813381217834503, "loss": 1.3724, "step": 3420 }, { "epoch": 0.4900444062455236, "grad_norm": 1.147403359413147, "learning_rate": 0.00010808756727566736, "loss": 1.413, "step": 3421 }, { "epoch": 0.4901876521988254, "grad_norm": 1.2361677885055542, "learning_rate": 0.00010804132063193655, "loss": 1.4378, "step": 3422 }, { "epoch": 0.4903308981521272, "grad_norm": 1.0963706970214844, "learning_rate": 0.00010799507225710843, "loss": 1.468, "step": 3423 }, { "epoch": 0.490474144105429, "grad_norm": 1.054884433746338, "learning_rate": 0.00010794882216113911, "loss": 1.3274, "step": 3424 }, { "epoch": 0.49061739005873084, "grad_norm": 1.1649534702301025, "learning_rate": 0.00010790257035398503, "loss": 1.5202, "step": 3425 }, { "epoch": 0.4907606360120327, "grad_norm": 1.1683424711227417, "learning_rate": 0.00010785631684560316, "loss": 1.4954, "step": 3426 }, { "epoch": 0.49090388196533447, "grad_norm": 0.9950129389762878, "learning_rate": 0.00010781006164595067, "loss": 1.3923, "step": 3427 }, { "epoch": 0.4910471279186363, "grad_norm": 1.3926845788955688, "learning_rate": 0.00010776380476498518, "loss": 1.3753, "step": 3428 }, { "epoch": 0.4911903738719381, "grad_norm": 1.040183186531067, "learning_rate": 0.00010771754621266466, "loss": 1.4767, "step": 3429 }, { "epoch": 0.49133361982523993, "grad_norm": 1.1992692947387695, "learning_rate": 0.00010767128599894738, "loss": 1.4342, "step": 3430 }, { "epoch": 0.49147686577854177, "grad_norm": 1.1832858324050903, "learning_rate": 0.00010762502413379209, "loss": 1.4657, "step": 3431 }, { "epoch": 0.49162011173184356, "grad_norm": 1.2628129720687866, "learning_rate": 0.0001075787606271578, "loss": 1.4361, "step": 3432 }, { "epoch": 0.4917633576851454, "grad_norm": 1.2780241966247559, "learning_rate": 0.0001075324954890039, "loss": 1.5173, "step": 3433 }, { "epoch": 0.49190660363844724, "grad_norm": 1.3416266441345215, "learning_rate": 0.00010748622872929009, "loss": 1.4028, "step": 3434 }, { "epoch": 0.492049849591749, "grad_norm": 1.0095843076705933, "learning_rate": 0.0001074399603579765, "loss": 1.5194, "step": 3435 }, { "epoch": 0.49219309554505086, "grad_norm": 1.0416206121444702, "learning_rate": 0.00010739369038502356, "loss": 1.5138, "step": 3436 }, { "epoch": 0.49233634149835265, "grad_norm": 1.1671234369277954, "learning_rate": 0.00010734741882039204, "loss": 1.4027, "step": 3437 }, { "epoch": 0.4924795874516545, "grad_norm": 1.050166368484497, "learning_rate": 0.00010730114567404305, "loss": 1.5624, "step": 3438 }, { "epoch": 0.49262283340495633, "grad_norm": 1.0378574132919312, "learning_rate": 0.00010725487095593811, "loss": 1.5012, "step": 3439 }, { "epoch": 0.4927660793582581, "grad_norm": 0.9915941953659058, "learning_rate": 0.00010720859467603898, "loss": 1.4393, "step": 3440 }, { "epoch": 0.49290932531155995, "grad_norm": 1.4908393621444702, "learning_rate": 0.00010716231684430779, "loss": 1.4421, "step": 3441 }, { "epoch": 0.4930525712648618, "grad_norm": 1.087257742881775, "learning_rate": 0.00010711603747070702, "loss": 1.1705, "step": 3442 }, { "epoch": 0.4931958172181636, "grad_norm": 1.2150148153305054, "learning_rate": 0.00010706975656519946, "loss": 1.3682, "step": 3443 }, { "epoch": 0.4933390631714654, "grad_norm": 1.1109732389450073, "learning_rate": 0.00010702347413774832, "loss": 1.4389, "step": 3444 }, { "epoch": 0.4934823091247672, "grad_norm": 0.9174085259437561, "learning_rate": 0.00010697719019831695, "loss": 1.5606, "step": 3445 }, { "epoch": 0.49362555507806904, "grad_norm": 1.0272855758666992, "learning_rate": 0.0001069309047568692, "loss": 1.3576, "step": 3446 }, { "epoch": 0.4937688010313709, "grad_norm": 0.8774657845497131, "learning_rate": 0.00010688461782336915, "loss": 1.6035, "step": 3447 }, { "epoch": 0.49391204698467267, "grad_norm": 1.0023385286331177, "learning_rate": 0.00010683832940778127, "loss": 1.2459, "step": 3448 }, { "epoch": 0.4940552929379745, "grad_norm": 0.9568058252334595, "learning_rate": 0.00010679203952007031, "loss": 1.2008, "step": 3449 }, { "epoch": 0.4941985388912763, "grad_norm": 1.1014618873596191, "learning_rate": 0.00010674574817020128, "loss": 1.5294, "step": 3450 }, { "epoch": 0.49434178484457814, "grad_norm": 1.0991486310958862, "learning_rate": 0.00010669945536813963, "loss": 1.307, "step": 3451 }, { "epoch": 0.49448503079788, "grad_norm": 1.1589651107788086, "learning_rate": 0.00010665316112385102, "loss": 1.4186, "step": 3452 }, { "epoch": 0.49462827675118176, "grad_norm": 1.1061111688613892, "learning_rate": 0.00010660686544730145, "loss": 1.3606, "step": 3453 }, { "epoch": 0.4947715227044836, "grad_norm": 1.2987325191497803, "learning_rate": 0.00010656056834845727, "loss": 1.5352, "step": 3454 }, { "epoch": 0.49491476865778544, "grad_norm": 1.111465334892273, "learning_rate": 0.00010651426983728503, "loss": 1.2665, "step": 3455 }, { "epoch": 0.4950580146110872, "grad_norm": 1.312867283821106, "learning_rate": 0.00010646796992375172, "loss": 1.4199, "step": 3456 }, { "epoch": 0.49520126056438907, "grad_norm": 1.2852320671081543, "learning_rate": 0.00010642166861782455, "loss": 1.4076, "step": 3457 }, { "epoch": 0.49534450651769085, "grad_norm": 1.0688996315002441, "learning_rate": 0.00010637536592947103, "loss": 1.5563, "step": 3458 }, { "epoch": 0.4954877524709927, "grad_norm": 1.1064202785491943, "learning_rate": 0.00010632906186865899, "loss": 1.4824, "step": 3459 }, { "epoch": 0.49563099842429453, "grad_norm": 1.1301168203353882, "learning_rate": 0.00010628275644535657, "loss": 1.3545, "step": 3460 }, { "epoch": 0.4957742443775963, "grad_norm": 1.0639129877090454, "learning_rate": 0.00010623644966953212, "loss": 1.4714, "step": 3461 }, { "epoch": 0.49591749033089816, "grad_norm": 0.9906801581382751, "learning_rate": 0.00010619014155115441, "loss": 1.4358, "step": 3462 }, { "epoch": 0.4960607362842, "grad_norm": 1.2321364879608154, "learning_rate": 0.00010614383210019241, "loss": 1.3419, "step": 3463 }, { "epoch": 0.4962039822375018, "grad_norm": 1.1029038429260254, "learning_rate": 0.00010609752132661539, "loss": 1.3464, "step": 3464 }, { "epoch": 0.4963472281908036, "grad_norm": 1.0389331579208374, "learning_rate": 0.00010605120924039293, "loss": 1.4859, "step": 3465 }, { "epoch": 0.4964904741441054, "grad_norm": 1.1372735500335693, "learning_rate": 0.00010600489585149484, "loss": 1.3899, "step": 3466 }, { "epoch": 0.49663372009740725, "grad_norm": 1.1853569746017456, "learning_rate": 0.00010595858116989128, "loss": 1.3445, "step": 3467 }, { "epoch": 0.4967769660507091, "grad_norm": 1.0772411823272705, "learning_rate": 0.00010591226520555264, "loss": 1.2438, "step": 3468 }, { "epoch": 0.4969202120040109, "grad_norm": 1.1271501779556274, "learning_rate": 0.00010586594796844965, "loss": 1.427, "step": 3469 }, { "epoch": 0.4970634579573127, "grad_norm": 1.1908570528030396, "learning_rate": 0.00010581962946855317, "loss": 1.6222, "step": 3470 }, { "epoch": 0.4972067039106145, "grad_norm": 1.278064489364624, "learning_rate": 0.0001057733097158345, "loss": 1.6879, "step": 3471 }, { "epoch": 0.49734994986391634, "grad_norm": 1.2117416858673096, "learning_rate": 0.0001057269887202651, "loss": 1.5391, "step": 3472 }, { "epoch": 0.4974931958172182, "grad_norm": 1.0972899198532104, "learning_rate": 0.00010568066649181676, "loss": 1.3929, "step": 3473 }, { "epoch": 0.49763644177051997, "grad_norm": 1.1661982536315918, "learning_rate": 0.00010563434304046151, "loss": 1.335, "step": 3474 }, { "epoch": 0.4977796877238218, "grad_norm": 1.2792906761169434, "learning_rate": 0.0001055880183761716, "loss": 1.4514, "step": 3475 }, { "epoch": 0.49792293367712365, "grad_norm": 1.221048355102539, "learning_rate": 0.00010554169250891967, "loss": 1.2078, "step": 3476 }, { "epoch": 0.49806617963042543, "grad_norm": 1.480916142463684, "learning_rate": 0.00010549536544867845, "loss": 1.387, "step": 3477 }, { "epoch": 0.4982094255837273, "grad_norm": 1.1148792505264282, "learning_rate": 0.00010544903720542105, "loss": 1.3677, "step": 3478 }, { "epoch": 0.49835267153702906, "grad_norm": 0.9390967488288879, "learning_rate": 0.00010540270778912073, "loss": 1.386, "step": 3479 }, { "epoch": 0.4984959174903309, "grad_norm": 1.0526762008666992, "learning_rate": 0.00010535637720975117, "loss": 1.4109, "step": 3480 }, { "epoch": 0.49863916344363274, "grad_norm": 1.1913859844207764, "learning_rate": 0.00010531004547728613, "loss": 1.4696, "step": 3481 }, { "epoch": 0.4987824093969345, "grad_norm": 0.969770073890686, "learning_rate": 0.00010526371260169972, "loss": 1.5182, "step": 3482 }, { "epoch": 0.49892565535023636, "grad_norm": 1.2126184701919556, "learning_rate": 0.00010521737859296623, "loss": 1.4371, "step": 3483 }, { "epoch": 0.49906890130353815, "grad_norm": 1.1347695589065552, "learning_rate": 0.00010517104346106022, "loss": 1.3374, "step": 3484 }, { "epoch": 0.49921214725684, "grad_norm": 1.031693458557129, "learning_rate": 0.00010512470721595655, "loss": 1.1568, "step": 3485 }, { "epoch": 0.49935539321014183, "grad_norm": 0.972343921661377, "learning_rate": 0.00010507836986763022, "loss": 1.5731, "step": 3486 }, { "epoch": 0.4994986391634436, "grad_norm": 1.110434651374817, "learning_rate": 0.0001050320314260565, "loss": 1.3145, "step": 3487 }, { "epoch": 0.49964188511674545, "grad_norm": 1.256438136100769, "learning_rate": 0.00010498569190121097, "loss": 1.4113, "step": 3488 }, { "epoch": 0.4997851310700473, "grad_norm": 1.2458997964859009, "learning_rate": 0.00010493935130306934, "loss": 1.2825, "step": 3489 }, { "epoch": 0.4999283770233491, "grad_norm": 1.125307321548462, "learning_rate": 0.00010489300964160762, "loss": 1.3764, "step": 3490 }, { "epoch": 0.5000716229766509, "grad_norm": 1.0271503925323486, "learning_rate": 0.00010484666692680201, "loss": 1.3052, "step": 3491 }, { "epoch": 0.5002148689299527, "grad_norm": 1.138046383857727, "learning_rate": 0.00010480032316862891, "loss": 1.3657, "step": 3492 }, { "epoch": 0.5003581148832545, "grad_norm": 1.1509279012680054, "learning_rate": 0.00010475397837706504, "loss": 1.3596, "step": 3493 }, { "epoch": 0.5005013608365564, "grad_norm": 0.9271268844604492, "learning_rate": 0.00010470763256208729, "loss": 1.4179, "step": 3494 }, { "epoch": 0.5006446067898582, "grad_norm": 0.9962475299835205, "learning_rate": 0.00010466128573367273, "loss": 1.5723, "step": 3495 }, { "epoch": 0.50078785274316, "grad_norm": 1.202447533607483, "learning_rate": 0.00010461493790179868, "loss": 1.4853, "step": 3496 }, { "epoch": 0.5009310986964618, "grad_norm": 0.9652479887008667, "learning_rate": 0.00010456858907644271, "loss": 1.3837, "step": 3497 }, { "epoch": 0.5010743446497636, "grad_norm": 0.9198633432388306, "learning_rate": 0.00010452223926758258, "loss": 1.5054, "step": 3498 }, { "epoch": 0.5012175906030655, "grad_norm": 1.013107180595398, "learning_rate": 0.00010447588848519625, "loss": 1.5808, "step": 3499 }, { "epoch": 0.5013608365563673, "grad_norm": 1.0811415910720825, "learning_rate": 0.00010442953673926185, "loss": 1.4595, "step": 3500 }, { "epoch": 0.501504082509669, "grad_norm": 1.044018030166626, "learning_rate": 0.00010438318403975786, "loss": 1.653, "step": 3501 }, { "epoch": 0.5016473284629709, "grad_norm": 1.00082266330719, "learning_rate": 0.00010433683039666278, "loss": 1.311, "step": 3502 }, { "epoch": 0.5017905744162727, "grad_norm": 1.2591431140899658, "learning_rate": 0.00010429047581995546, "loss": 1.4552, "step": 3503 }, { "epoch": 0.5019338203695746, "grad_norm": 1.1139363050460815, "learning_rate": 0.00010424412031961484, "loss": 1.4517, "step": 3504 }, { "epoch": 0.5020770663228764, "grad_norm": 1.0364805459976196, "learning_rate": 0.00010419776390562015, "loss": 1.4931, "step": 3505 }, { "epoch": 0.5022203122761782, "grad_norm": 1.1609759330749512, "learning_rate": 0.00010415140658795077, "loss": 1.3019, "step": 3506 }, { "epoch": 0.50236355822948, "grad_norm": 1.2251722812652588, "learning_rate": 0.00010410504837658627, "loss": 1.435, "step": 3507 }, { "epoch": 0.5025068041827818, "grad_norm": 1.0920590162277222, "learning_rate": 0.00010405868928150648, "loss": 1.3149, "step": 3508 }, { "epoch": 0.5026500501360837, "grad_norm": 1.1456358432769775, "learning_rate": 0.00010401232931269127, "loss": 1.5039, "step": 3509 }, { "epoch": 0.5027932960893855, "grad_norm": 0.9957374334335327, "learning_rate": 0.0001039659684801209, "loss": 1.4093, "step": 3510 }, { "epoch": 0.5029365420426873, "grad_norm": 1.0901576280593872, "learning_rate": 0.00010391960679377563, "loss": 1.2113, "step": 3511 }, { "epoch": 0.5030797879959891, "grad_norm": 1.248495101928711, "learning_rate": 0.00010387324426363605, "loss": 1.4261, "step": 3512 }, { "epoch": 0.5032230339492909, "grad_norm": 1.0495734214782715, "learning_rate": 0.00010382688089968275, "loss": 1.4713, "step": 3513 }, { "epoch": 0.5033662799025928, "grad_norm": 1.0612696409225464, "learning_rate": 0.00010378051671189677, "loss": 1.3575, "step": 3514 }, { "epoch": 0.5035095258558946, "grad_norm": 1.1795153617858887, "learning_rate": 0.00010373415171025904, "loss": 1.5452, "step": 3515 }, { "epoch": 0.5036527718091964, "grad_norm": 0.9429616928100586, "learning_rate": 0.00010368778590475088, "loss": 1.6492, "step": 3516 }, { "epoch": 0.5037960177624982, "grad_norm": 1.1151853799819946, "learning_rate": 0.00010364141930535367, "loss": 1.3705, "step": 3517 }, { "epoch": 0.5039392637158, "grad_norm": 1.1622681617736816, "learning_rate": 0.00010359505192204899, "loss": 1.3159, "step": 3518 }, { "epoch": 0.5040825096691018, "grad_norm": 1.12083101272583, "learning_rate": 0.00010354868376481862, "loss": 1.426, "step": 3519 }, { "epoch": 0.5042257556224037, "grad_norm": 1.304024338722229, "learning_rate": 0.00010350231484364443, "loss": 1.4219, "step": 3520 }, { "epoch": 0.5043690015757055, "grad_norm": 1.0422699451446533, "learning_rate": 0.00010345594516850851, "loss": 1.4692, "step": 3521 }, { "epoch": 0.5045122475290073, "grad_norm": 0.9824684858322144, "learning_rate": 0.00010340957474939312, "loss": 1.4901, "step": 3522 }, { "epoch": 0.5046554934823091, "grad_norm": 1.0426971912384033, "learning_rate": 0.00010336320359628067, "loss": 1.1901, "step": 3523 }, { "epoch": 0.5047987394356109, "grad_norm": 1.111275315284729, "learning_rate": 0.00010331683171915374, "loss": 1.4754, "step": 3524 }, { "epoch": 0.5049419853889128, "grad_norm": 1.085099458694458, "learning_rate": 0.00010327045912799496, "loss": 1.24, "step": 3525 }, { "epoch": 0.5050852313422146, "grad_norm": 1.129228115081787, "learning_rate": 0.00010322408583278732, "loss": 1.3735, "step": 3526 }, { "epoch": 0.5052284772955165, "grad_norm": 1.2121059894561768, "learning_rate": 0.00010317771184351375, "loss": 1.4364, "step": 3527 }, { "epoch": 0.5053717232488182, "grad_norm": 1.0406516790390015, "learning_rate": 0.00010313133717015749, "loss": 1.4341, "step": 3528 }, { "epoch": 0.50551496920212, "grad_norm": 1.0965200662612915, "learning_rate": 0.00010308496182270176, "loss": 1.4157, "step": 3529 }, { "epoch": 0.5056582151554219, "grad_norm": 1.0915249586105347, "learning_rate": 0.00010303858581113015, "loss": 1.3564, "step": 3530 }, { "epoch": 0.5058014611087237, "grad_norm": 1.229333519935608, "learning_rate": 0.00010299220914542618, "loss": 1.2827, "step": 3531 }, { "epoch": 0.5059447070620255, "grad_norm": 1.2443373203277588, "learning_rate": 0.00010294583183557362, "loss": 1.3208, "step": 3532 }, { "epoch": 0.5060879530153273, "grad_norm": 1.2086431980133057, "learning_rate": 0.00010289945389155643, "loss": 1.4478, "step": 3533 }, { "epoch": 0.5062311989686291, "grad_norm": 1.1349196434020996, "learning_rate": 0.00010285307532335846, "loss": 1.4209, "step": 3534 }, { "epoch": 0.506374444921931, "grad_norm": 1.244154930114746, "learning_rate": 0.00010280669614096405, "loss": 1.6094, "step": 3535 }, { "epoch": 0.5065176908752328, "grad_norm": 1.1485425233840942, "learning_rate": 0.00010276031635435741, "loss": 1.371, "step": 3536 }, { "epoch": 0.5066609368285346, "grad_norm": 1.0762343406677246, "learning_rate": 0.00010271393597352297, "loss": 1.5967, "step": 3537 }, { "epoch": 0.5068041827818364, "grad_norm": 1.2191520929336548, "learning_rate": 0.00010266755500844523, "loss": 1.4247, "step": 3538 }, { "epoch": 0.5069474287351382, "grad_norm": 1.1406378746032715, "learning_rate": 0.00010262117346910896, "loss": 1.43, "step": 3539 }, { "epoch": 0.50709067468844, "grad_norm": 1.1244142055511475, "learning_rate": 0.00010257479136549889, "loss": 1.3693, "step": 3540 }, { "epoch": 0.5072339206417419, "grad_norm": 1.1609869003295898, "learning_rate": 0.00010252840870759993, "loss": 1.4445, "step": 3541 }, { "epoch": 0.5073771665950437, "grad_norm": 1.0453346967697144, "learning_rate": 0.00010248202550539716, "loss": 1.5308, "step": 3542 }, { "epoch": 0.5075204125483455, "grad_norm": 1.098109245300293, "learning_rate": 0.0001024356417688757, "loss": 1.3971, "step": 3543 }, { "epoch": 0.5076636585016473, "grad_norm": 0.9849418997764587, "learning_rate": 0.00010238925750802089, "loss": 1.347, "step": 3544 }, { "epoch": 0.5078069044549491, "grad_norm": 1.385166049003601, "learning_rate": 0.00010234287273281802, "loss": 1.3742, "step": 3545 }, { "epoch": 0.507950150408251, "grad_norm": 1.1497560739517212, "learning_rate": 0.00010229648745325265, "loss": 1.285, "step": 3546 }, { "epoch": 0.5080933963615528, "grad_norm": 1.158395767211914, "learning_rate": 0.00010225010167931035, "loss": 1.5339, "step": 3547 }, { "epoch": 0.5082366423148547, "grad_norm": 0.9182134866714478, "learning_rate": 0.00010220371542097682, "loss": 1.3372, "step": 3548 }, { "epoch": 0.5083798882681564, "grad_norm": 1.1867213249206543, "learning_rate": 0.00010215732868823795, "loss": 1.444, "step": 3549 }, { "epoch": 0.5085231342214582, "grad_norm": 1.001237154006958, "learning_rate": 0.00010211094149107954, "loss": 1.4755, "step": 3550 }, { "epoch": 0.5086663801747601, "grad_norm": 1.096407413482666, "learning_rate": 0.0001020645538394877, "loss": 1.4568, "step": 3551 }, { "epoch": 0.5088096261280619, "grad_norm": 0.9367827773094177, "learning_rate": 0.00010201816574344849, "loss": 1.4318, "step": 3552 }, { "epoch": 0.5089528720813637, "grad_norm": 1.0399775505065918, "learning_rate": 0.00010197177721294818, "loss": 1.4125, "step": 3553 }, { "epoch": 0.5090961180346655, "grad_norm": 1.0926892757415771, "learning_rate": 0.00010192538825797296, "loss": 1.3252, "step": 3554 }, { "epoch": 0.5092393639879673, "grad_norm": 1.0777487754821777, "learning_rate": 0.00010187899888850933, "loss": 1.4171, "step": 3555 }, { "epoch": 0.5093826099412692, "grad_norm": 1.1289820671081543, "learning_rate": 0.00010183260911454373, "loss": 1.3153, "step": 3556 }, { "epoch": 0.509525855894571, "grad_norm": 1.0153565406799316, "learning_rate": 0.00010178621894606275, "loss": 1.5408, "step": 3557 }, { "epoch": 0.5096691018478728, "grad_norm": 1.171958565711975, "learning_rate": 0.00010173982839305304, "loss": 1.3925, "step": 3558 }, { "epoch": 0.5098123478011746, "grad_norm": 1.178712248802185, "learning_rate": 0.0001016934374655013, "loss": 1.4596, "step": 3559 }, { "epoch": 0.5099555937544764, "grad_norm": 0.9625037312507629, "learning_rate": 0.00010164704617339442, "loss": 1.5827, "step": 3560 }, { "epoch": 0.5100988397077783, "grad_norm": 1.018351435661316, "learning_rate": 0.00010160065452671923, "loss": 1.4315, "step": 3561 }, { "epoch": 0.5102420856610801, "grad_norm": 1.138958215713501, "learning_rate": 0.00010155426253546274, "loss": 1.4114, "step": 3562 }, { "epoch": 0.5103853316143819, "grad_norm": 1.1471924781799316, "learning_rate": 0.00010150787020961197, "loss": 1.4659, "step": 3563 }, { "epoch": 0.5105285775676837, "grad_norm": 1.1197329759597778, "learning_rate": 0.00010146147755915407, "loss": 1.5327, "step": 3564 }, { "epoch": 0.5106718235209855, "grad_norm": 1.0745993852615356, "learning_rate": 0.00010141508459407623, "loss": 1.5338, "step": 3565 }, { "epoch": 0.5108150694742873, "grad_norm": 1.2184962034225464, "learning_rate": 0.00010136869132436568, "loss": 1.2889, "step": 3566 }, { "epoch": 0.5109583154275892, "grad_norm": 0.8949021100997925, "learning_rate": 0.00010132229776000974, "loss": 1.428, "step": 3567 }, { "epoch": 0.511101561380891, "grad_norm": 1.0946015119552612, "learning_rate": 0.00010127590391099584, "loss": 1.4546, "step": 3568 }, { "epoch": 0.5112448073341929, "grad_norm": 0.9700352549552917, "learning_rate": 0.00010122950978731141, "loss": 1.4379, "step": 3569 }, { "epoch": 0.5113880532874946, "grad_norm": 1.1097909212112427, "learning_rate": 0.00010118311539894394, "loss": 1.5445, "step": 3570 }, { "epoch": 0.5115312992407964, "grad_norm": 1.1244127750396729, "learning_rate": 0.00010113672075588099, "loss": 1.4041, "step": 3571 }, { "epoch": 0.5116745451940983, "grad_norm": 1.1424570083618164, "learning_rate": 0.0001010903258681102, "loss": 1.3175, "step": 3572 }, { "epoch": 0.5118177911474001, "grad_norm": 1.2052518129348755, "learning_rate": 0.00010104393074561924, "loss": 1.4077, "step": 3573 }, { "epoch": 0.511961037100702, "grad_norm": 1.0825068950653076, "learning_rate": 0.0001009975353983958, "loss": 1.4789, "step": 3574 }, { "epoch": 0.5121042830540037, "grad_norm": 0.9709814190864563, "learning_rate": 0.0001009511398364277, "loss": 1.3619, "step": 3575 }, { "epoch": 0.5122475290073055, "grad_norm": 1.0578655004501343, "learning_rate": 0.00010090474406970271, "loss": 1.3785, "step": 3576 }, { "epoch": 0.5123907749606074, "grad_norm": 0.9182901382446289, "learning_rate": 0.00010085834810820871, "loss": 1.5544, "step": 3577 }, { "epoch": 0.5125340209139092, "grad_norm": 1.026823878288269, "learning_rate": 0.00010081195196193362, "loss": 1.6281, "step": 3578 }, { "epoch": 0.512677266867211, "grad_norm": 1.0636945962905884, "learning_rate": 0.00010076555564086534, "loss": 1.6485, "step": 3579 }, { "epoch": 0.5128205128205128, "grad_norm": 0.8647247552871704, "learning_rate": 0.0001007191591549919, "loss": 1.4047, "step": 3580 }, { "epoch": 0.5129637587738146, "grad_norm": 1.25603449344635, "learning_rate": 0.0001006727625143013, "loss": 1.3918, "step": 3581 }, { "epoch": 0.5131070047271165, "grad_norm": 1.4147133827209473, "learning_rate": 0.00010062636572878155, "loss": 1.4178, "step": 3582 }, { "epoch": 0.5132502506804183, "grad_norm": 0.952659010887146, "learning_rate": 0.00010057996880842078, "loss": 1.4683, "step": 3583 }, { "epoch": 0.5133934966337201, "grad_norm": 1.19082772731781, "learning_rate": 0.00010053357176320703, "loss": 1.34, "step": 3584 }, { "epoch": 0.5135367425870219, "grad_norm": 1.41098153591156, "learning_rate": 0.00010048717460312855, "loss": 1.3464, "step": 3585 }, { "epoch": 0.5136799885403237, "grad_norm": 0.9611150622367859, "learning_rate": 0.00010044077733817341, "loss": 1.5962, "step": 3586 }, { "epoch": 0.5138232344936255, "grad_norm": 1.1712177991867065, "learning_rate": 0.00010039437997832984, "loss": 1.3965, "step": 3587 }, { "epoch": 0.5139664804469274, "grad_norm": 1.274287223815918, "learning_rate": 0.00010034798253358595, "loss": 1.4191, "step": 3588 }, { "epoch": 0.5141097264002292, "grad_norm": 1.1152019500732422, "learning_rate": 0.00010030158501393008, "loss": 1.5161, "step": 3589 }, { "epoch": 0.5142529723535311, "grad_norm": 1.252150535583496, "learning_rate": 0.00010025518742935041, "loss": 1.2252, "step": 3590 }, { "epoch": 0.5143962183068328, "grad_norm": 0.9764754176139832, "learning_rate": 0.00010020878978983522, "loss": 1.5607, "step": 3591 }, { "epoch": 0.5145394642601346, "grad_norm": 1.1876507997512817, "learning_rate": 0.00010016239210537273, "loss": 1.5723, "step": 3592 }, { "epoch": 0.5146827102134365, "grad_norm": 1.0151089429855347, "learning_rate": 0.00010011599438595123, "loss": 1.5332, "step": 3593 }, { "epoch": 0.5148259561667383, "grad_norm": 0.9354745149612427, "learning_rate": 0.00010006959664155904, "loss": 1.4294, "step": 3594 }, { "epoch": 0.5149692021200402, "grad_norm": 0.9792630672454834, "learning_rate": 0.0001000231988821844, "loss": 1.617, "step": 3595 }, { "epoch": 0.5151124480733419, "grad_norm": 1.1594178676605225, "learning_rate": 9.997680111781562e-05, "loss": 1.5172, "step": 3596 }, { "epoch": 0.5152556940266437, "grad_norm": 1.0626468658447266, "learning_rate": 9.9930403358441e-05, "loss": 1.3427, "step": 3597 }, { "epoch": 0.5153989399799456, "grad_norm": 1.099827527999878, "learning_rate": 9.98840056140488e-05, "loss": 1.2924, "step": 3598 }, { "epoch": 0.5155421859332474, "grad_norm": 1.1610121726989746, "learning_rate": 9.983760789462728e-05, "loss": 1.4958, "step": 3599 }, { "epoch": 0.5156854318865493, "grad_norm": 0.9326909184455872, "learning_rate": 9.979121021016482e-05, "loss": 1.2922, "step": 3600 }, { "epoch": 0.515828677839851, "grad_norm": 1.0037531852722168, "learning_rate": 9.97448125706496e-05, "loss": 1.5875, "step": 3601 }, { "epoch": 0.5159719237931528, "grad_norm": 1.0887730121612549, "learning_rate": 9.969841498606993e-05, "loss": 1.5401, "step": 3602 }, { "epoch": 0.5161151697464547, "grad_norm": 0.9652317762374878, "learning_rate": 9.965201746641407e-05, "loss": 1.386, "step": 3603 }, { "epoch": 0.5162584156997565, "grad_norm": 1.1576441526412964, "learning_rate": 9.96056200216702e-05, "loss": 1.4251, "step": 3604 }, { "epoch": 0.5164016616530583, "grad_norm": 0.9793096780776978, "learning_rate": 9.955922266182664e-05, "loss": 1.473, "step": 3605 }, { "epoch": 0.5165449076063601, "grad_norm": 1.2000478506088257, "learning_rate": 9.951282539687146e-05, "loss": 1.2743, "step": 3606 }, { "epoch": 0.5166881535596619, "grad_norm": 1.299761176109314, "learning_rate": 9.946642823679295e-05, "loss": 1.3419, "step": 3607 }, { "epoch": 0.5168313995129638, "grad_norm": 1.00190269947052, "learning_rate": 9.942003119157926e-05, "loss": 1.3603, "step": 3608 }, { "epoch": 0.5169746454662656, "grad_norm": 1.1703273057937622, "learning_rate": 9.937363427121847e-05, "loss": 1.5272, "step": 3609 }, { "epoch": 0.5171178914195674, "grad_norm": 0.9897456169128418, "learning_rate": 9.932723748569876e-05, "loss": 1.3854, "step": 3610 }, { "epoch": 0.5172611373728692, "grad_norm": 1.143493890762329, "learning_rate": 9.928084084500812e-05, "loss": 1.4402, "step": 3611 }, { "epoch": 0.517404383326171, "grad_norm": 0.9592913389205933, "learning_rate": 9.923444435913466e-05, "loss": 1.2905, "step": 3612 }, { "epoch": 0.5175476292794728, "grad_norm": 1.0832074880599976, "learning_rate": 9.918804803806642e-05, "loss": 1.3416, "step": 3613 }, { "epoch": 0.5176908752327747, "grad_norm": 1.1010894775390625, "learning_rate": 9.914165189179131e-05, "loss": 1.2985, "step": 3614 }, { "epoch": 0.5178341211860765, "grad_norm": 1.1934311389923096, "learning_rate": 9.90952559302973e-05, "loss": 1.6325, "step": 3615 }, { "epoch": 0.5179773671393784, "grad_norm": 1.0829458236694336, "learning_rate": 9.904886016357233e-05, "loss": 1.5473, "step": 3616 }, { "epoch": 0.5181206130926801, "grad_norm": 1.0482755899429321, "learning_rate": 9.900246460160422e-05, "loss": 1.5872, "step": 3617 }, { "epoch": 0.5182638590459819, "grad_norm": 1.1596132516860962, "learning_rate": 9.89560692543808e-05, "loss": 1.4246, "step": 3618 }, { "epoch": 0.5184071049992838, "grad_norm": 1.3710565567016602, "learning_rate": 9.890967413188983e-05, "loss": 1.4766, "step": 3619 }, { "epoch": 0.5185503509525856, "grad_norm": 0.9798039793968201, "learning_rate": 9.886327924411902e-05, "loss": 1.4038, "step": 3620 }, { "epoch": 0.5186935969058875, "grad_norm": 1.2228951454162598, "learning_rate": 9.88168846010561e-05, "loss": 1.6637, "step": 3621 }, { "epoch": 0.5188368428591892, "grad_norm": 1.1269609928131104, "learning_rate": 9.87704902126886e-05, "loss": 1.3064, "step": 3622 }, { "epoch": 0.518980088812491, "grad_norm": 1.1092020273208618, "learning_rate": 9.872409608900416e-05, "loss": 1.3595, "step": 3623 }, { "epoch": 0.5191233347657929, "grad_norm": 1.135745644569397, "learning_rate": 9.867770223999028e-05, "loss": 1.2588, "step": 3624 }, { "epoch": 0.5192665807190947, "grad_norm": 0.9903700947761536, "learning_rate": 9.863130867563435e-05, "loss": 1.5104, "step": 3625 }, { "epoch": 0.5194098266723965, "grad_norm": 1.039293885231018, "learning_rate": 9.858491540592382e-05, "loss": 1.5577, "step": 3626 }, { "epoch": 0.5195530726256983, "grad_norm": 0.9378083944320679, "learning_rate": 9.853852244084594e-05, "loss": 1.4388, "step": 3627 }, { "epoch": 0.5196963185790001, "grad_norm": 1.2949731349945068, "learning_rate": 9.849212979038804e-05, "loss": 1.4602, "step": 3628 }, { "epoch": 0.519839564532302, "grad_norm": 1.0818771123886108, "learning_rate": 9.84457374645373e-05, "loss": 1.3401, "step": 3629 }, { "epoch": 0.5199828104856038, "grad_norm": 1.213658094406128, "learning_rate": 9.83993454732808e-05, "loss": 1.4178, "step": 3630 }, { "epoch": 0.5201260564389056, "grad_norm": 0.924899697303772, "learning_rate": 9.835295382660559e-05, "loss": 1.397, "step": 3631 }, { "epoch": 0.5202693023922074, "grad_norm": 1.1707476377487183, "learning_rate": 9.830656253449872e-05, "loss": 1.4536, "step": 3632 }, { "epoch": 0.5204125483455092, "grad_norm": 1.1964712142944336, "learning_rate": 9.826017160694697e-05, "loss": 1.3359, "step": 3633 }, { "epoch": 0.520555794298811, "grad_norm": 1.0140048265457153, "learning_rate": 9.821378105393727e-05, "loss": 1.4623, "step": 3634 }, { "epoch": 0.5206990402521129, "grad_norm": 0.9919286966323853, "learning_rate": 9.816739088545628e-05, "loss": 1.4354, "step": 3635 }, { "epoch": 0.5208422862054147, "grad_norm": 1.0083461999893188, "learning_rate": 9.812100111149068e-05, "loss": 1.5466, "step": 3636 }, { "epoch": 0.5209855321587166, "grad_norm": 1.0954113006591797, "learning_rate": 9.807461174202707e-05, "loss": 1.3662, "step": 3637 }, { "epoch": 0.5211287781120183, "grad_norm": 1.0055007934570312, "learning_rate": 9.802822278705186e-05, "loss": 1.3512, "step": 3638 }, { "epoch": 0.5212720240653201, "grad_norm": 1.1127763986587524, "learning_rate": 9.798183425655156e-05, "loss": 1.6607, "step": 3639 }, { "epoch": 0.521415270018622, "grad_norm": 1.178781509399414, "learning_rate": 9.793544616051232e-05, "loss": 1.4596, "step": 3640 }, { "epoch": 0.5215585159719238, "grad_norm": 0.979458212852478, "learning_rate": 9.788905850892047e-05, "loss": 1.5598, "step": 3641 }, { "epoch": 0.5217017619252257, "grad_norm": 1.0697314739227295, "learning_rate": 9.78426713117621e-05, "loss": 1.3801, "step": 3642 }, { "epoch": 0.5218450078785274, "grad_norm": 1.142638921737671, "learning_rate": 9.779628457902319e-05, "loss": 1.3785, "step": 3643 }, { "epoch": 0.5219882538318292, "grad_norm": 1.3159234523773193, "learning_rate": 9.774989832068966e-05, "loss": 1.4439, "step": 3644 }, { "epoch": 0.5221314997851311, "grad_norm": 1.4077343940734863, "learning_rate": 9.770351254674738e-05, "loss": 1.4663, "step": 3645 }, { "epoch": 0.5222747457384329, "grad_norm": 1.0821431875228882, "learning_rate": 9.765712726718199e-05, "loss": 1.3828, "step": 3646 }, { "epoch": 0.5224179916917348, "grad_norm": 0.982258141040802, "learning_rate": 9.761074249197915e-05, "loss": 1.4064, "step": 3647 }, { "epoch": 0.5225612376450365, "grad_norm": 1.0557477474212646, "learning_rate": 9.75643582311243e-05, "loss": 1.4764, "step": 3648 }, { "epoch": 0.5227044835983383, "grad_norm": 1.007712721824646, "learning_rate": 9.751797449460285e-05, "loss": 1.5898, "step": 3649 }, { "epoch": 0.5228477295516402, "grad_norm": 0.9973738789558411, "learning_rate": 9.74715912924001e-05, "loss": 1.366, "step": 3650 }, { "epoch": 0.522990975504942, "grad_norm": 1.09053635597229, "learning_rate": 9.742520863450115e-05, "loss": 1.3529, "step": 3651 }, { "epoch": 0.5231342214582438, "grad_norm": 1.0612603425979614, "learning_rate": 9.737882653089107e-05, "loss": 1.3934, "step": 3652 }, { "epoch": 0.5232774674115456, "grad_norm": 1.039867877960205, "learning_rate": 9.733244499155479e-05, "loss": 1.5296, "step": 3653 }, { "epoch": 0.5234207133648474, "grad_norm": 1.1256277561187744, "learning_rate": 9.728606402647705e-05, "loss": 1.485, "step": 3654 }, { "epoch": 0.5235639593181493, "grad_norm": 1.45452880859375, "learning_rate": 9.723968364564264e-05, "loss": 1.5192, "step": 3655 }, { "epoch": 0.5237072052714511, "grad_norm": 1.5885354280471802, "learning_rate": 9.719330385903596e-05, "loss": 1.5644, "step": 3656 }, { "epoch": 0.5238504512247529, "grad_norm": 1.1291733980178833, "learning_rate": 9.714692467664152e-05, "loss": 1.3879, "step": 3657 }, { "epoch": 0.5239936971780548, "grad_norm": 1.141465663909912, "learning_rate": 9.710054610844364e-05, "loss": 1.6012, "step": 3658 }, { "epoch": 0.5241369431313565, "grad_norm": 1.2138019800186157, "learning_rate": 9.705416816442639e-05, "loss": 1.4978, "step": 3659 }, { "epoch": 0.5242801890846583, "grad_norm": 0.9463986158370972, "learning_rate": 9.700779085457386e-05, "loss": 1.4724, "step": 3660 }, { "epoch": 0.5244234350379602, "grad_norm": 1.2441587448120117, "learning_rate": 9.696141418886987e-05, "loss": 1.3979, "step": 3661 }, { "epoch": 0.524566680991262, "grad_norm": 1.083832859992981, "learning_rate": 9.691503817729824e-05, "loss": 1.5718, "step": 3662 }, { "epoch": 0.5247099269445639, "grad_norm": 1.232864499092102, "learning_rate": 9.686866282984256e-05, "loss": 1.2951, "step": 3663 }, { "epoch": 0.5248531728978656, "grad_norm": 0.9618181586265564, "learning_rate": 9.682228815648628e-05, "loss": 1.3801, "step": 3664 }, { "epoch": 0.5249964188511674, "grad_norm": 1.0909799337387085, "learning_rate": 9.67759141672127e-05, "loss": 1.3373, "step": 3665 }, { "epoch": 0.5251396648044693, "grad_norm": 0.9654986262321472, "learning_rate": 9.672954087200506e-05, "loss": 1.3922, "step": 3666 }, { "epoch": 0.5252829107577711, "grad_norm": 0.9920533895492554, "learning_rate": 9.668316828084628e-05, "loss": 1.389, "step": 3667 }, { "epoch": 0.525426156711073, "grad_norm": 1.0261787176132202, "learning_rate": 9.663679640371934e-05, "loss": 1.3685, "step": 3668 }, { "epoch": 0.5255694026643747, "grad_norm": 0.9672529101371765, "learning_rate": 9.65904252506069e-05, "loss": 1.5902, "step": 3669 }, { "epoch": 0.5257126486176765, "grad_norm": 1.0289409160614014, "learning_rate": 9.654405483149151e-05, "loss": 1.6726, "step": 3670 }, { "epoch": 0.5258558945709784, "grad_norm": 0.9697561860084534, "learning_rate": 9.649768515635562e-05, "loss": 1.4761, "step": 3671 }, { "epoch": 0.5259991405242802, "grad_norm": 1.0699905157089233, "learning_rate": 9.64513162351814e-05, "loss": 1.3822, "step": 3672 }, { "epoch": 0.526142386477582, "grad_norm": 1.0676175355911255, "learning_rate": 9.6404948077951e-05, "loss": 1.6505, "step": 3673 }, { "epoch": 0.5262856324308838, "grad_norm": 1.2039189338684082, "learning_rate": 9.635858069464635e-05, "loss": 1.5004, "step": 3674 }, { "epoch": 0.5264288783841856, "grad_norm": 1.1918784379959106, "learning_rate": 9.631221409524913e-05, "loss": 1.3343, "step": 3675 }, { "epoch": 0.5265721243374875, "grad_norm": 0.9595719575881958, "learning_rate": 9.6265848289741e-05, "loss": 1.4458, "step": 3676 }, { "epoch": 0.5267153702907893, "grad_norm": 1.1812224388122559, "learning_rate": 9.621948328810326e-05, "loss": 1.4513, "step": 3677 }, { "epoch": 0.5268586162440911, "grad_norm": 1.1946989297866821, "learning_rate": 9.617311910031724e-05, "loss": 1.4736, "step": 3678 }, { "epoch": 0.527001862197393, "grad_norm": 1.0645179748535156, "learning_rate": 9.6126755736364e-05, "loss": 1.4159, "step": 3679 }, { "epoch": 0.5271451081506947, "grad_norm": 1.4286433458328247, "learning_rate": 9.608039320622439e-05, "loss": 1.5471, "step": 3680 }, { "epoch": 0.5272883541039965, "grad_norm": 1.2019009590148926, "learning_rate": 9.60340315198791e-05, "loss": 1.5456, "step": 3681 }, { "epoch": 0.5274316000572984, "grad_norm": 1.0661293268203735, "learning_rate": 9.598767068730874e-05, "loss": 1.4934, "step": 3682 }, { "epoch": 0.5275748460106002, "grad_norm": 1.045753836631775, "learning_rate": 9.594131071849353e-05, "loss": 1.5277, "step": 3683 }, { "epoch": 0.5277180919639021, "grad_norm": 0.8283714652061462, "learning_rate": 9.589495162341374e-05, "loss": 1.4843, "step": 3684 }, { "epoch": 0.5278613379172038, "grad_norm": 1.2093822956085205, "learning_rate": 9.584859341204924e-05, "loss": 1.3494, "step": 3685 }, { "epoch": 0.5280045838705056, "grad_norm": 1.0161635875701904, "learning_rate": 9.580223609437986e-05, "loss": 1.4181, "step": 3686 }, { "epoch": 0.5281478298238075, "grad_norm": 1.0097510814666748, "learning_rate": 9.57558796803852e-05, "loss": 1.3536, "step": 3687 }, { "epoch": 0.5282910757771093, "grad_norm": 0.9038384556770325, "learning_rate": 9.570952418004455e-05, "loss": 1.492, "step": 3688 }, { "epoch": 0.5284343217304112, "grad_norm": 0.9352546334266663, "learning_rate": 9.566316960333726e-05, "loss": 1.4586, "step": 3689 }, { "epoch": 0.5285775676837129, "grad_norm": 1.070955514907837, "learning_rate": 9.561681596024215e-05, "loss": 1.5079, "step": 3690 }, { "epoch": 0.5287208136370147, "grad_norm": 1.0143582820892334, "learning_rate": 9.557046326073814e-05, "loss": 1.4311, "step": 3691 }, { "epoch": 0.5288640595903166, "grad_norm": 1.1470767259597778, "learning_rate": 9.552411151480378e-05, "loss": 1.5105, "step": 3692 }, { "epoch": 0.5290073055436184, "grad_norm": 1.3744410276412964, "learning_rate": 9.547776073241744e-05, "loss": 1.5286, "step": 3693 }, { "epoch": 0.5291505514969203, "grad_norm": 1.1075444221496582, "learning_rate": 9.543141092355727e-05, "loss": 1.2798, "step": 3694 }, { "epoch": 0.529293797450222, "grad_norm": 0.998053252696991, "learning_rate": 9.538506209820133e-05, "loss": 1.3604, "step": 3695 }, { "epoch": 0.5294370434035238, "grad_norm": 0.9933651685714722, "learning_rate": 9.53387142663273e-05, "loss": 1.4591, "step": 3696 }, { "epoch": 0.5295802893568257, "grad_norm": 1.2047055959701538, "learning_rate": 9.529236743791276e-05, "loss": 1.4494, "step": 3697 }, { "epoch": 0.5297235353101275, "grad_norm": 1.3577064275741577, "learning_rate": 9.524602162293497e-05, "loss": 1.6059, "step": 3698 }, { "epoch": 0.5298667812634293, "grad_norm": 0.9883847832679749, "learning_rate": 9.519967683137108e-05, "loss": 1.4963, "step": 3699 }, { "epoch": 0.5300100272167311, "grad_norm": 1.151240587234497, "learning_rate": 9.515333307319805e-05, "loss": 1.4894, "step": 3700 }, { "epoch": 0.5301532731700329, "grad_norm": 1.2949846982955933, "learning_rate": 9.51069903583924e-05, "loss": 1.3232, "step": 3701 }, { "epoch": 0.5302965191233348, "grad_norm": 1.2442485094070435, "learning_rate": 9.506064869693066e-05, "loss": 1.4509, "step": 3702 }, { "epoch": 0.5304397650766366, "grad_norm": 1.0899699926376343, "learning_rate": 9.501430809878906e-05, "loss": 1.394, "step": 3703 }, { "epoch": 0.5305830110299384, "grad_norm": 1.0936716794967651, "learning_rate": 9.496796857394352e-05, "loss": 1.5165, "step": 3704 }, { "epoch": 0.5307262569832403, "grad_norm": 0.9987739324569702, "learning_rate": 9.492163013236983e-05, "loss": 1.4658, "step": 3705 }, { "epoch": 0.530869502936542, "grad_norm": 1.4368233680725098, "learning_rate": 9.487529278404347e-05, "loss": 1.3469, "step": 3706 }, { "epoch": 0.5310127488898438, "grad_norm": 1.1306756734848022, "learning_rate": 9.482895653893978e-05, "loss": 1.3995, "step": 3707 }, { "epoch": 0.5311559948431457, "grad_norm": 0.9543665647506714, "learning_rate": 9.478262140703381e-05, "loss": 1.544, "step": 3708 }, { "epoch": 0.5312992407964475, "grad_norm": 1.261277198791504, "learning_rate": 9.473628739830032e-05, "loss": 1.2997, "step": 3709 }, { "epoch": 0.5314424867497494, "grad_norm": 1.018303394317627, "learning_rate": 9.468995452271387e-05, "loss": 1.4486, "step": 3710 }, { "epoch": 0.5315857327030511, "grad_norm": 1.0480808019638062, "learning_rate": 9.464362279024884e-05, "loss": 1.5603, "step": 3711 }, { "epoch": 0.5317289786563529, "grad_norm": 1.0268921852111816, "learning_rate": 9.459729221087926e-05, "loss": 1.504, "step": 3712 }, { "epoch": 0.5318722246096548, "grad_norm": 1.113730788230896, "learning_rate": 9.4550962794579e-05, "loss": 1.4887, "step": 3713 }, { "epoch": 0.5320154705629566, "grad_norm": 1.1313713788986206, "learning_rate": 9.450463455132158e-05, "loss": 1.3447, "step": 3714 }, { "epoch": 0.5321587165162585, "grad_norm": 0.9995348453521729, "learning_rate": 9.445830749108034e-05, "loss": 1.3711, "step": 3715 }, { "epoch": 0.5323019624695602, "grad_norm": 1.106647253036499, "learning_rate": 9.44119816238284e-05, "loss": 1.2895, "step": 3716 }, { "epoch": 0.532445208422862, "grad_norm": 1.117048978805542, "learning_rate": 9.43656569595385e-05, "loss": 1.147, "step": 3717 }, { "epoch": 0.5325884543761639, "grad_norm": 1.1832622289657593, "learning_rate": 9.431933350818326e-05, "loss": 1.5899, "step": 3718 }, { "epoch": 0.5327317003294657, "grad_norm": 0.983793318271637, "learning_rate": 9.427301127973491e-05, "loss": 1.5206, "step": 3719 }, { "epoch": 0.5328749462827675, "grad_norm": 1.0098230838775635, "learning_rate": 9.422669028416552e-05, "loss": 1.3397, "step": 3720 }, { "epoch": 0.5330181922360693, "grad_norm": 1.0970121622085571, "learning_rate": 9.418037053144687e-05, "loss": 1.4693, "step": 3721 }, { "epoch": 0.5331614381893711, "grad_norm": 1.0659592151641846, "learning_rate": 9.413405203155038e-05, "loss": 1.4068, "step": 3722 }, { "epoch": 0.533304684142673, "grad_norm": 1.0793672800064087, "learning_rate": 9.408773479444736e-05, "loss": 1.4173, "step": 3723 }, { "epoch": 0.5334479300959748, "grad_norm": 1.0774383544921875, "learning_rate": 9.404141883010874e-05, "loss": 1.3618, "step": 3724 }, { "epoch": 0.5335911760492766, "grad_norm": 1.050866961479187, "learning_rate": 9.399510414850518e-05, "loss": 1.3432, "step": 3725 }, { "epoch": 0.5337344220025785, "grad_norm": 1.3098955154418945, "learning_rate": 9.394879075960712e-05, "loss": 1.4692, "step": 3726 }, { "epoch": 0.5338776679558802, "grad_norm": 1.521201491355896, "learning_rate": 9.390247867338464e-05, "loss": 1.3366, "step": 3727 }, { "epoch": 0.534020913909182, "grad_norm": 1.0459558963775635, "learning_rate": 9.385616789980759e-05, "loss": 1.3019, "step": 3728 }, { "epoch": 0.5341641598624839, "grad_norm": 1.0965683460235596, "learning_rate": 9.380985844884561e-05, "loss": 1.2818, "step": 3729 }, { "epoch": 0.5343074058157857, "grad_norm": 0.9563542604446411, "learning_rate": 9.376355033046789e-05, "loss": 1.516, "step": 3730 }, { "epoch": 0.5344506517690876, "grad_norm": 1.326822280883789, "learning_rate": 9.371724355464346e-05, "loss": 1.5117, "step": 3731 }, { "epoch": 0.5345938977223893, "grad_norm": 0.9649562239646912, "learning_rate": 9.367093813134103e-05, "loss": 1.5129, "step": 3732 }, { "epoch": 0.5347371436756911, "grad_norm": 0.9666440486907959, "learning_rate": 9.362463407052898e-05, "loss": 1.5485, "step": 3733 }, { "epoch": 0.534880389628993, "grad_norm": 1.1279494762420654, "learning_rate": 9.357833138217549e-05, "loss": 1.4396, "step": 3734 }, { "epoch": 0.5350236355822948, "grad_norm": 1.0130126476287842, "learning_rate": 9.35320300762483e-05, "loss": 1.3434, "step": 3735 }, { "epoch": 0.5351668815355967, "grad_norm": 1.3631035089492798, "learning_rate": 9.348573016271498e-05, "loss": 1.6268, "step": 3736 }, { "epoch": 0.5353101274888984, "grad_norm": 1.3924418687820435, "learning_rate": 9.343943165154278e-05, "loss": 1.6327, "step": 3737 }, { "epoch": 0.5354533734422002, "grad_norm": 1.0600417852401733, "learning_rate": 9.339313455269856e-05, "loss": 1.4602, "step": 3738 }, { "epoch": 0.5355966193955021, "grad_norm": 0.9704450368881226, "learning_rate": 9.334683887614902e-05, "loss": 1.4972, "step": 3739 }, { "epoch": 0.5357398653488039, "grad_norm": 1.1102216243743896, "learning_rate": 9.330054463186038e-05, "loss": 1.4775, "step": 3740 }, { "epoch": 0.5358831113021058, "grad_norm": 1.1245002746582031, "learning_rate": 9.325425182979873e-05, "loss": 1.4571, "step": 3741 }, { "epoch": 0.5360263572554075, "grad_norm": 1.1927329301834106, "learning_rate": 9.320796047992973e-05, "loss": 1.2818, "step": 3742 }, { "epoch": 0.5361696032087093, "grad_norm": 0.9984963536262512, "learning_rate": 9.316167059221874e-05, "loss": 1.5256, "step": 3743 }, { "epoch": 0.5363128491620112, "grad_norm": 1.1356059312820435, "learning_rate": 9.311538217663084e-05, "loss": 1.4571, "step": 3744 }, { "epoch": 0.536456095115313, "grad_norm": 1.1551594734191895, "learning_rate": 9.306909524313083e-05, "loss": 1.4098, "step": 3745 }, { "epoch": 0.5365993410686148, "grad_norm": 1.1143728494644165, "learning_rate": 9.302280980168307e-05, "loss": 1.3824, "step": 3746 }, { "epoch": 0.5367425870219167, "grad_norm": 0.9751464128494263, "learning_rate": 9.297652586225175e-05, "loss": 1.505, "step": 3747 }, { "epoch": 0.5368858329752184, "grad_norm": 1.0940778255462646, "learning_rate": 9.293024343480055e-05, "loss": 1.3833, "step": 3748 }, { "epoch": 0.5370290789285203, "grad_norm": 1.0687774419784546, "learning_rate": 9.288396252929299e-05, "loss": 1.3458, "step": 3749 }, { "epoch": 0.5371723248818221, "grad_norm": 1.0949299335479736, "learning_rate": 9.283768315569226e-05, "loss": 1.5347, "step": 3750 }, { "epoch": 0.5373155708351239, "grad_norm": 1.0210727453231812, "learning_rate": 9.279140532396104e-05, "loss": 1.4049, "step": 3751 }, { "epoch": 0.5374588167884258, "grad_norm": 1.2851144075393677, "learning_rate": 9.27451290440619e-05, "loss": 1.4596, "step": 3752 }, { "epoch": 0.5376020627417275, "grad_norm": 1.1687676906585693, "learning_rate": 9.269885432595696e-05, "loss": 1.6354, "step": 3753 }, { "epoch": 0.5377453086950293, "grad_norm": 1.020384669303894, "learning_rate": 9.265258117960797e-05, "loss": 1.5364, "step": 3754 }, { "epoch": 0.5378885546483312, "grad_norm": 1.1265329122543335, "learning_rate": 9.260630961497648e-05, "loss": 1.384, "step": 3755 }, { "epoch": 0.538031800601633, "grad_norm": 0.8979662656784058, "learning_rate": 9.256003964202352e-05, "loss": 1.5755, "step": 3756 }, { "epoch": 0.5381750465549349, "grad_norm": 1.2778605222702026, "learning_rate": 9.251377127070992e-05, "loss": 1.3791, "step": 3757 }, { "epoch": 0.5383182925082366, "grad_norm": 0.974638819694519, "learning_rate": 9.246750451099615e-05, "loss": 1.3681, "step": 3758 }, { "epoch": 0.5384615384615384, "grad_norm": 1.0232633352279663, "learning_rate": 9.242123937284223e-05, "loss": 1.5524, "step": 3759 }, { "epoch": 0.5386047844148403, "grad_norm": 1.1448662281036377, "learning_rate": 9.23749758662079e-05, "loss": 1.402, "step": 3760 }, { "epoch": 0.5387480303681421, "grad_norm": 1.3974542617797852, "learning_rate": 9.232871400105265e-05, "loss": 1.3393, "step": 3761 }, { "epoch": 0.538891276321444, "grad_norm": 1.007043480873108, "learning_rate": 9.228245378733537e-05, "loss": 1.2798, "step": 3762 }, { "epoch": 0.5390345222747457, "grad_norm": 1.0703458786010742, "learning_rate": 9.223619523501484e-05, "loss": 1.4011, "step": 3763 }, { "epoch": 0.5391777682280475, "grad_norm": 1.2995879650115967, "learning_rate": 9.218993835404935e-05, "loss": 1.4441, "step": 3764 }, { "epoch": 0.5393210141813494, "grad_norm": 1.1272830963134766, "learning_rate": 9.214368315439685e-05, "loss": 1.4058, "step": 3765 }, { "epoch": 0.5394642601346512, "grad_norm": 1.0471073389053345, "learning_rate": 9.209742964601498e-05, "loss": 1.4818, "step": 3766 }, { "epoch": 0.539607506087953, "grad_norm": 1.1284985542297363, "learning_rate": 9.205117783886091e-05, "loss": 1.4006, "step": 3767 }, { "epoch": 0.5397507520412549, "grad_norm": 1.1525839567184448, "learning_rate": 9.20049277428916e-05, "loss": 1.4398, "step": 3768 }, { "epoch": 0.5398939979945566, "grad_norm": 0.9529622197151184, "learning_rate": 9.195867936806346e-05, "loss": 1.3445, "step": 3769 }, { "epoch": 0.5400372439478585, "grad_norm": 1.156707525253296, "learning_rate": 9.191243272433268e-05, "loss": 1.3769, "step": 3770 }, { "epoch": 0.5401804899011603, "grad_norm": 1.3044835329055786, "learning_rate": 9.1866187821655e-05, "loss": 1.5264, "step": 3771 }, { "epoch": 0.5403237358544621, "grad_norm": 1.090537190437317, "learning_rate": 9.181994466998577e-05, "loss": 1.2196, "step": 3772 }, { "epoch": 0.540466981807764, "grad_norm": 1.139548659324646, "learning_rate": 9.177370327928004e-05, "loss": 1.4651, "step": 3773 }, { "epoch": 0.5406102277610657, "grad_norm": 1.1102821826934814, "learning_rate": 9.172746365949246e-05, "loss": 1.3472, "step": 3774 }, { "epoch": 0.5407534737143675, "grad_norm": 1.2802883386611938, "learning_rate": 9.16812258205772e-05, "loss": 1.4484, "step": 3775 }, { "epoch": 0.5408967196676694, "grad_norm": 0.9637173414230347, "learning_rate": 9.16349897724882e-05, "loss": 1.2357, "step": 3776 }, { "epoch": 0.5410399656209712, "grad_norm": 0.9517381191253662, "learning_rate": 9.158875552517887e-05, "loss": 1.3801, "step": 3777 }, { "epoch": 0.5411832115742731, "grad_norm": 1.0572885274887085, "learning_rate": 9.154252308860231e-05, "loss": 1.2741, "step": 3778 }, { "epoch": 0.5413264575275748, "grad_norm": 1.0451692342758179, "learning_rate": 9.149629247271128e-05, "loss": 1.5178, "step": 3779 }, { "epoch": 0.5414697034808766, "grad_norm": 1.0329433679580688, "learning_rate": 9.145006368745803e-05, "loss": 1.2116, "step": 3780 }, { "epoch": 0.5416129494341785, "grad_norm": 1.1981528997421265, "learning_rate": 9.14038367427945e-05, "loss": 1.4681, "step": 3781 }, { "epoch": 0.5417561953874803, "grad_norm": 1.1192377805709839, "learning_rate": 9.135761164867221e-05, "loss": 1.5535, "step": 3782 }, { "epoch": 0.5418994413407822, "grad_norm": 1.038725733757019, "learning_rate": 9.131138841504222e-05, "loss": 1.3324, "step": 3783 }, { "epoch": 0.5420426872940839, "grad_norm": 1.2320663928985596, "learning_rate": 9.126516705185535e-05, "loss": 1.4358, "step": 3784 }, { "epoch": 0.5421859332473857, "grad_norm": 1.1724793910980225, "learning_rate": 9.121894756906181e-05, "loss": 1.5002, "step": 3785 }, { "epoch": 0.5423291792006876, "grad_norm": 0.9963299036026001, "learning_rate": 9.117272997661157e-05, "loss": 1.4899, "step": 3786 }, { "epoch": 0.5424724251539894, "grad_norm": 1.2009650468826294, "learning_rate": 9.112651428445416e-05, "loss": 1.5703, "step": 3787 }, { "epoch": 0.5426156711072913, "grad_norm": 1.2211109399795532, "learning_rate": 9.108030050253861e-05, "loss": 1.4439, "step": 3788 }, { "epoch": 0.5427589170605931, "grad_norm": 1.1849770545959473, "learning_rate": 9.103408864081365e-05, "loss": 1.3528, "step": 3789 }, { "epoch": 0.5429021630138948, "grad_norm": 1.1851774454116821, "learning_rate": 9.098787870922751e-05, "loss": 1.5033, "step": 3790 }, { "epoch": 0.5430454089671967, "grad_norm": 1.2212743759155273, "learning_rate": 9.094167071772808e-05, "loss": 1.4093, "step": 3791 }, { "epoch": 0.5431886549204985, "grad_norm": 1.0389831066131592, "learning_rate": 9.08954646762628e-05, "loss": 1.4082, "step": 3792 }, { "epoch": 0.5433319008738003, "grad_norm": 0.9905836582183838, "learning_rate": 9.084926059477865e-05, "loss": 1.3448, "step": 3793 }, { "epoch": 0.5434751468271022, "grad_norm": 1.2270208597183228, "learning_rate": 9.080305848322223e-05, "loss": 1.4081, "step": 3794 }, { "epoch": 0.5436183927804039, "grad_norm": 0.9713574051856995, "learning_rate": 9.075685835153981e-05, "loss": 1.4037, "step": 3795 }, { "epoch": 0.5437616387337058, "grad_norm": 0.9763878583908081, "learning_rate": 9.071066020967698e-05, "loss": 1.4406, "step": 3796 }, { "epoch": 0.5439048846870076, "grad_norm": 1.0408380031585693, "learning_rate": 9.066446406757916e-05, "loss": 1.503, "step": 3797 }, { "epoch": 0.5440481306403094, "grad_norm": 1.399655818939209, "learning_rate": 9.06182699351912e-05, "loss": 1.2813, "step": 3798 }, { "epoch": 0.5441913765936113, "grad_norm": 1.4461750984191895, "learning_rate": 9.057207782245757e-05, "loss": 1.4684, "step": 3799 }, { "epoch": 0.544334622546913, "grad_norm": 1.0886634588241577, "learning_rate": 9.052588773932228e-05, "loss": 1.2758, "step": 3800 }, { "epoch": 0.5444778685002148, "grad_norm": 1.2171721458435059, "learning_rate": 9.047969969572889e-05, "loss": 1.5761, "step": 3801 }, { "epoch": 0.5446211144535167, "grad_norm": 1.094599723815918, "learning_rate": 9.043351370162058e-05, "loss": 1.3756, "step": 3802 }, { "epoch": 0.5447643604068185, "grad_norm": 0.988581120967865, "learning_rate": 9.038732976694005e-05, "loss": 1.3801, "step": 3803 }, { "epoch": 0.5449076063601204, "grad_norm": 1.0670268535614014, "learning_rate": 9.034114790162952e-05, "loss": 1.5492, "step": 3804 }, { "epoch": 0.5450508523134221, "grad_norm": 1.0462517738342285, "learning_rate": 9.029496811563086e-05, "loss": 1.4568, "step": 3805 }, { "epoch": 0.5451940982667239, "grad_norm": 1.123539924621582, "learning_rate": 9.024879041888535e-05, "loss": 1.6635, "step": 3806 }, { "epoch": 0.5453373442200258, "grad_norm": 0.9439468383789062, "learning_rate": 9.020261482133398e-05, "loss": 1.3692, "step": 3807 }, { "epoch": 0.5454805901733276, "grad_norm": 1.149488925933838, "learning_rate": 9.015644133291723e-05, "loss": 1.316, "step": 3808 }, { "epoch": 0.5456238361266295, "grad_norm": 1.1205295324325562, "learning_rate": 9.011026996357503e-05, "loss": 1.5297, "step": 3809 }, { "epoch": 0.5457670820799312, "grad_norm": 1.0543869733810425, "learning_rate": 9.006410072324696e-05, "loss": 1.4257, "step": 3810 }, { "epoch": 0.545910328033233, "grad_norm": 1.0220791101455688, "learning_rate": 9.00179336218722e-05, "loss": 1.5224, "step": 3811 }, { "epoch": 0.5460535739865349, "grad_norm": 1.0441293716430664, "learning_rate": 8.997176866938923e-05, "loss": 1.6382, "step": 3812 }, { "epoch": 0.5461968199398367, "grad_norm": 1.1052736043930054, "learning_rate": 8.992560587573636e-05, "loss": 1.4608, "step": 3813 }, { "epoch": 0.5463400658931385, "grad_norm": 1.0017259120941162, "learning_rate": 8.987944525085123e-05, "loss": 1.5593, "step": 3814 }, { "epoch": 0.5464833118464404, "grad_norm": 1.206753134727478, "learning_rate": 8.983328680467108e-05, "loss": 1.4171, "step": 3815 }, { "epoch": 0.5466265577997421, "grad_norm": 1.1038222312927246, "learning_rate": 8.978713054713271e-05, "loss": 1.5701, "step": 3816 }, { "epoch": 0.546769803753044, "grad_norm": 1.0201430320739746, "learning_rate": 8.974097648817238e-05, "loss": 1.449, "step": 3817 }, { "epoch": 0.5469130497063458, "grad_norm": 0.9676105976104736, "learning_rate": 8.9694824637726e-05, "loss": 1.4471, "step": 3818 }, { "epoch": 0.5470562956596476, "grad_norm": 1.0683642625808716, "learning_rate": 8.964867500572878e-05, "loss": 1.4505, "step": 3819 }, { "epoch": 0.5471995416129495, "grad_norm": 1.0126845836639404, "learning_rate": 8.960252760211571e-05, "loss": 1.2989, "step": 3820 }, { "epoch": 0.5473427875662512, "grad_norm": 1.1245943307876587, "learning_rate": 8.955638243682118e-05, "loss": 1.2839, "step": 3821 }, { "epoch": 0.547486033519553, "grad_norm": 1.096720814704895, "learning_rate": 8.951023951977904e-05, "loss": 1.4595, "step": 3822 }, { "epoch": 0.5476292794728549, "grad_norm": 1.269492268562317, "learning_rate": 8.946409886092273e-05, "loss": 1.2394, "step": 3823 }, { "epoch": 0.5477725254261567, "grad_norm": 1.0266296863555908, "learning_rate": 8.941796047018525e-05, "loss": 1.5995, "step": 3824 }, { "epoch": 0.5479157713794586, "grad_norm": 1.0450046062469482, "learning_rate": 8.9371824357499e-05, "loss": 1.3744, "step": 3825 }, { "epoch": 0.5480590173327603, "grad_norm": 1.100357174873352, "learning_rate": 8.932569053279599e-05, "loss": 1.2157, "step": 3826 }, { "epoch": 0.5482022632860621, "grad_norm": 1.2342329025268555, "learning_rate": 8.927955900600762e-05, "loss": 1.4972, "step": 3827 }, { "epoch": 0.548345509239364, "grad_norm": 1.069729208946228, "learning_rate": 8.92334297870649e-05, "loss": 1.6715, "step": 3828 }, { "epoch": 0.5484887551926658, "grad_norm": 1.1644538640975952, "learning_rate": 8.918730288589835e-05, "loss": 1.3111, "step": 3829 }, { "epoch": 0.5486320011459677, "grad_norm": 0.9721007943153381, "learning_rate": 8.914117831243789e-05, "loss": 1.381, "step": 3830 }, { "epoch": 0.5487752470992694, "grad_norm": 1.3485933542251587, "learning_rate": 8.909505607661304e-05, "loss": 1.3944, "step": 3831 }, { "epoch": 0.5489184930525712, "grad_norm": 0.9442260265350342, "learning_rate": 8.904893618835277e-05, "loss": 1.5614, "step": 3832 }, { "epoch": 0.5490617390058731, "grad_norm": 0.9943949580192566, "learning_rate": 8.900281865758551e-05, "loss": 1.4188, "step": 3833 }, { "epoch": 0.5492049849591749, "grad_norm": 1.1620928049087524, "learning_rate": 8.89567034942393e-05, "loss": 1.4703, "step": 3834 }, { "epoch": 0.5493482309124768, "grad_norm": 1.2041715383529663, "learning_rate": 8.89105907082415e-05, "loss": 1.3214, "step": 3835 }, { "epoch": 0.5494914768657786, "grad_norm": 1.1206190586090088, "learning_rate": 8.886448030951912e-05, "loss": 1.4794, "step": 3836 }, { "epoch": 0.5496347228190803, "grad_norm": 1.107960820198059, "learning_rate": 8.881837230799859e-05, "loss": 1.4781, "step": 3837 }, { "epoch": 0.5497779687723822, "grad_norm": 1.121025800704956, "learning_rate": 8.877226671360577e-05, "loss": 1.5051, "step": 3838 }, { "epoch": 0.549921214725684, "grad_norm": 1.0006877183914185, "learning_rate": 8.872616353626608e-05, "loss": 1.4129, "step": 3839 }, { "epoch": 0.5500644606789858, "grad_norm": 1.0987566709518433, "learning_rate": 8.868006278590442e-05, "loss": 1.428, "step": 3840 }, { "epoch": 0.5502077066322877, "grad_norm": 1.187184453010559, "learning_rate": 8.863396447244511e-05, "loss": 1.4837, "step": 3841 }, { "epoch": 0.5503509525855894, "grad_norm": 0.9451871514320374, "learning_rate": 8.8587868605812e-05, "loss": 1.4214, "step": 3842 }, { "epoch": 0.5504941985388913, "grad_norm": 1.081092119216919, "learning_rate": 8.854177519592834e-05, "loss": 1.5579, "step": 3843 }, { "epoch": 0.5506374444921931, "grad_norm": 1.0740270614624023, "learning_rate": 8.849568425271693e-05, "loss": 1.4188, "step": 3844 }, { "epoch": 0.5507806904454949, "grad_norm": 1.0851736068725586, "learning_rate": 8.844959578610005e-05, "loss": 1.3686, "step": 3845 }, { "epoch": 0.5509239363987968, "grad_norm": 1.0271912813186646, "learning_rate": 8.840350980599934e-05, "loss": 1.4093, "step": 3846 }, { "epoch": 0.5510671823520985, "grad_norm": 1.0875070095062256, "learning_rate": 8.835742632233603e-05, "loss": 1.6076, "step": 3847 }, { "epoch": 0.5512104283054003, "grad_norm": 0.9868462085723877, "learning_rate": 8.831134534503069e-05, "loss": 1.4214, "step": 3848 }, { "epoch": 0.5513536742587022, "grad_norm": 1.0932023525238037, "learning_rate": 8.826526688400346e-05, "loss": 1.516, "step": 3849 }, { "epoch": 0.551496920212004, "grad_norm": 1.0740574598312378, "learning_rate": 8.82191909491739e-05, "loss": 1.4841, "step": 3850 }, { "epoch": 0.5516401661653059, "grad_norm": 1.0741970539093018, "learning_rate": 8.817311755046095e-05, "loss": 1.3124, "step": 3851 }, { "epoch": 0.5517834121186076, "grad_norm": 1.0681806802749634, "learning_rate": 8.812704669778315e-05, "loss": 1.3405, "step": 3852 }, { "epoch": 0.5519266580719094, "grad_norm": 1.030573844909668, "learning_rate": 8.808097840105841e-05, "loss": 1.4681, "step": 3853 }, { "epoch": 0.5520699040252113, "grad_norm": 1.0412677526474, "learning_rate": 8.803491267020404e-05, "loss": 1.4991, "step": 3854 }, { "epoch": 0.5522131499785131, "grad_norm": 1.028397798538208, "learning_rate": 8.79888495151369e-05, "loss": 1.3751, "step": 3855 }, { "epoch": 0.552356395931815, "grad_norm": 1.0425646305084229, "learning_rate": 8.794278894577322e-05, "loss": 1.4308, "step": 3856 }, { "epoch": 0.5524996418851168, "grad_norm": 1.0469813346862793, "learning_rate": 8.789673097202869e-05, "loss": 1.4158, "step": 3857 }, { "epoch": 0.5526428878384185, "grad_norm": 1.0224820375442505, "learning_rate": 8.785067560381852e-05, "loss": 1.4871, "step": 3858 }, { "epoch": 0.5527861337917204, "grad_norm": 1.2438688278198242, "learning_rate": 8.78046228510572e-05, "loss": 1.5247, "step": 3859 }, { "epoch": 0.5529293797450222, "grad_norm": 1.1570391654968262, "learning_rate": 8.77585727236588e-05, "loss": 1.4033, "step": 3860 }, { "epoch": 0.553072625698324, "grad_norm": 1.131225347518921, "learning_rate": 8.771252523153679e-05, "loss": 1.4037, "step": 3861 }, { "epoch": 0.5532158716516259, "grad_norm": 1.3698474168777466, "learning_rate": 8.766648038460399e-05, "loss": 1.529, "step": 3862 }, { "epoch": 0.5533591176049276, "grad_norm": 1.0556474924087524, "learning_rate": 8.762043819277279e-05, "loss": 1.5553, "step": 3863 }, { "epoch": 0.5535023635582295, "grad_norm": 1.063498854637146, "learning_rate": 8.757439866595489e-05, "loss": 1.4272, "step": 3864 }, { "epoch": 0.5536456095115313, "grad_norm": 1.147944450378418, "learning_rate": 8.752836181406147e-05, "loss": 1.4297, "step": 3865 }, { "epoch": 0.5537888554648331, "grad_norm": 1.038952350616455, "learning_rate": 8.748232764700314e-05, "loss": 1.4522, "step": 3866 }, { "epoch": 0.553932101418135, "grad_norm": 1.064469814300537, "learning_rate": 8.743629617468986e-05, "loss": 1.4506, "step": 3867 }, { "epoch": 0.5540753473714367, "grad_norm": 1.461004376411438, "learning_rate": 8.739026740703119e-05, "loss": 1.3769, "step": 3868 }, { "epoch": 0.5542185933247386, "grad_norm": 1.429337739944458, "learning_rate": 8.734424135393582e-05, "loss": 1.4826, "step": 3869 }, { "epoch": 0.5543618392780404, "grad_norm": 1.0875413417816162, "learning_rate": 8.729821802531212e-05, "loss": 1.4437, "step": 3870 }, { "epoch": 0.5545050852313422, "grad_norm": 1.3868958950042725, "learning_rate": 8.72521974310678e-05, "loss": 1.4189, "step": 3871 }, { "epoch": 0.5546483311846441, "grad_norm": 1.039495587348938, "learning_rate": 8.720617958110989e-05, "loss": 1.3494, "step": 3872 }, { "epoch": 0.5547915771379458, "grad_norm": 1.0088516473770142, "learning_rate": 8.71601644853449e-05, "loss": 1.3424, "step": 3873 }, { "epoch": 0.5549348230912476, "grad_norm": 1.179149866104126, "learning_rate": 8.71141521536788e-05, "loss": 1.4375, "step": 3874 }, { "epoch": 0.5550780690445495, "grad_norm": 0.9701326489448547, "learning_rate": 8.706814259601684e-05, "loss": 1.3753, "step": 3875 }, { "epoch": 0.5552213149978513, "grad_norm": 1.1500099897384644, "learning_rate": 8.702213582226379e-05, "loss": 1.5272, "step": 3876 }, { "epoch": 0.5553645609511532, "grad_norm": 0.8950748443603516, "learning_rate": 8.697613184232372e-05, "loss": 1.4617, "step": 3877 }, { "epoch": 0.555507806904455, "grad_norm": 1.3198305368423462, "learning_rate": 8.693013066610016e-05, "loss": 1.4492, "step": 3878 }, { "epoch": 0.5556510528577567, "grad_norm": 0.9568539261817932, "learning_rate": 8.68841323034961e-05, "loss": 1.398, "step": 3879 }, { "epoch": 0.5557942988110586, "grad_norm": 1.0182719230651855, "learning_rate": 8.68381367644137e-05, "loss": 1.3819, "step": 3880 }, { "epoch": 0.5559375447643604, "grad_norm": 1.1024657487869263, "learning_rate": 8.67921440587548e-05, "loss": 1.4614, "step": 3881 }, { "epoch": 0.5560807907176623, "grad_norm": 1.0097256898880005, "learning_rate": 8.674615419642044e-05, "loss": 1.5333, "step": 3882 }, { "epoch": 0.5562240366709641, "grad_norm": 1.2199742794036865, "learning_rate": 8.670016718731111e-05, "loss": 1.3474, "step": 3883 }, { "epoch": 0.5563672826242658, "grad_norm": 1.0519652366638184, "learning_rate": 8.665418304132666e-05, "loss": 1.5538, "step": 3884 }, { "epoch": 0.5565105285775677, "grad_norm": 1.143084168434143, "learning_rate": 8.66082017683663e-05, "loss": 1.2621, "step": 3885 }, { "epoch": 0.5566537745308695, "grad_norm": 1.058337688446045, "learning_rate": 8.656222337832875e-05, "loss": 1.4939, "step": 3886 }, { "epoch": 0.5567970204841713, "grad_norm": 1.0118240118026733, "learning_rate": 8.651624788111197e-05, "loss": 1.394, "step": 3887 }, { "epoch": 0.5569402664374732, "grad_norm": 1.1734325885772705, "learning_rate": 8.647027528661333e-05, "loss": 1.4291, "step": 3888 }, { "epoch": 0.5570835123907749, "grad_norm": 1.0719181299209595, "learning_rate": 8.64243056047296e-05, "loss": 1.4236, "step": 3889 }, { "epoch": 0.5572267583440768, "grad_norm": 1.1030477285385132, "learning_rate": 8.6378338845357e-05, "loss": 1.4701, "step": 3890 }, { "epoch": 0.5573700042973786, "grad_norm": 1.020727515220642, "learning_rate": 8.633237501839089e-05, "loss": 1.418, "step": 3891 }, { "epoch": 0.5575132502506804, "grad_norm": 0.9887786507606506, "learning_rate": 8.628641413372625e-05, "loss": 1.4229, "step": 3892 }, { "epoch": 0.5576564962039823, "grad_norm": 1.0122346878051758, "learning_rate": 8.624045620125727e-05, "loss": 1.463, "step": 3893 }, { "epoch": 0.557799742157284, "grad_norm": 1.035226821899414, "learning_rate": 8.619450123087757e-05, "loss": 1.458, "step": 3894 }, { "epoch": 0.5579429881105858, "grad_norm": 1.1406611204147339, "learning_rate": 8.614854923248016e-05, "loss": 1.3109, "step": 3895 }, { "epoch": 0.5580862340638877, "grad_norm": 1.2442231178283691, "learning_rate": 8.610260021595728e-05, "loss": 1.5573, "step": 3896 }, { "epoch": 0.5582294800171895, "grad_norm": 1.1228947639465332, "learning_rate": 8.605665419120071e-05, "loss": 1.623, "step": 3897 }, { "epoch": 0.5583727259704914, "grad_norm": 0.9782788753509521, "learning_rate": 8.601071116810143e-05, "loss": 1.2369, "step": 3898 }, { "epoch": 0.5585159719237931, "grad_norm": 1.1443662643432617, "learning_rate": 8.596477115654985e-05, "loss": 1.4317, "step": 3899 }, { "epoch": 0.5586592178770949, "grad_norm": 1.0493360757827759, "learning_rate": 8.591883416643574e-05, "loss": 1.4359, "step": 3900 }, { "epoch": 0.5588024638303968, "grad_norm": 0.9904102087020874, "learning_rate": 8.587290020764814e-05, "loss": 1.467, "step": 3901 }, { "epoch": 0.5589457097836986, "grad_norm": 1.177286148071289, "learning_rate": 8.582696929007556e-05, "loss": 1.6094, "step": 3902 }, { "epoch": 0.5590889557370005, "grad_norm": 1.3895848989486694, "learning_rate": 8.578104142360578e-05, "loss": 1.2424, "step": 3903 }, { "epoch": 0.5592322016903023, "grad_norm": 1.1811342239379883, "learning_rate": 8.573511661812589e-05, "loss": 1.4958, "step": 3904 }, { "epoch": 0.559375447643604, "grad_norm": 1.180285930633545, "learning_rate": 8.568919488352242e-05, "loss": 1.3743, "step": 3905 }, { "epoch": 0.5595186935969059, "grad_norm": 1.1766390800476074, "learning_rate": 8.564327622968113e-05, "loss": 1.5588, "step": 3906 }, { "epoch": 0.5596619395502077, "grad_norm": 1.0139480829238892, "learning_rate": 8.559736066648717e-05, "loss": 1.4818, "step": 3907 }, { "epoch": 0.5598051855035096, "grad_norm": 1.019096851348877, "learning_rate": 8.55514482038251e-05, "loss": 1.4169, "step": 3908 }, { "epoch": 0.5599484314568114, "grad_norm": 1.0620943307876587, "learning_rate": 8.550553885157866e-05, "loss": 1.3312, "step": 3909 }, { "epoch": 0.5600916774101131, "grad_norm": 1.053501009941101, "learning_rate": 8.545963261963102e-05, "loss": 1.5649, "step": 3910 }, { "epoch": 0.560234923363415, "grad_norm": 0.9656388759613037, "learning_rate": 8.541372951786469e-05, "loss": 1.4338, "step": 3911 }, { "epoch": 0.5603781693167168, "grad_norm": 0.9396020770072937, "learning_rate": 8.536782955616138e-05, "loss": 1.3519, "step": 3912 }, { "epoch": 0.5605214152700186, "grad_norm": 0.9551534056663513, "learning_rate": 8.532193274440235e-05, "loss": 1.52, "step": 3913 }, { "epoch": 0.5606646612233205, "grad_norm": 1.2763575315475464, "learning_rate": 8.527603909246791e-05, "loss": 1.2783, "step": 3914 }, { "epoch": 0.5608079071766222, "grad_norm": 1.0461019277572632, "learning_rate": 8.523014861023791e-05, "loss": 1.3851, "step": 3915 }, { "epoch": 0.560951153129924, "grad_norm": 1.0487090349197388, "learning_rate": 8.518426130759145e-05, "loss": 1.3792, "step": 3916 }, { "epoch": 0.5610943990832259, "grad_norm": 1.0350587368011475, "learning_rate": 8.513837719440688e-05, "loss": 1.3428, "step": 3917 }, { "epoch": 0.5612376450365277, "grad_norm": 0.9404356479644775, "learning_rate": 8.509249628056192e-05, "loss": 1.3942, "step": 3918 }, { "epoch": 0.5613808909898296, "grad_norm": 1.0931662321090698, "learning_rate": 8.504661857593358e-05, "loss": 1.4072, "step": 3919 }, { "epoch": 0.5615241369431313, "grad_norm": 1.213301181793213, "learning_rate": 8.500074409039827e-05, "loss": 1.4583, "step": 3920 }, { "epoch": 0.5616673828964331, "grad_norm": 0.9574490785598755, "learning_rate": 8.495487283383158e-05, "loss": 1.4549, "step": 3921 }, { "epoch": 0.561810628849735, "grad_norm": 1.125637412071228, "learning_rate": 8.490900481610845e-05, "loss": 1.3733, "step": 3922 }, { "epoch": 0.5619538748030368, "grad_norm": 1.0171054601669312, "learning_rate": 8.486314004710311e-05, "loss": 1.3631, "step": 3923 }, { "epoch": 0.5620971207563387, "grad_norm": 1.153102159500122, "learning_rate": 8.48172785366892e-05, "loss": 1.4226, "step": 3924 }, { "epoch": 0.5622403667096405, "grad_norm": 1.125910758972168, "learning_rate": 8.477142029473945e-05, "loss": 1.3307, "step": 3925 }, { "epoch": 0.5623836126629422, "grad_norm": 1.0122894048690796, "learning_rate": 8.47255653311261e-05, "loss": 1.5344, "step": 3926 }, { "epoch": 0.5625268586162441, "grad_norm": 1.0655895471572876, "learning_rate": 8.467971365572053e-05, "loss": 1.4975, "step": 3927 }, { "epoch": 0.5626701045695459, "grad_norm": 1.19464111328125, "learning_rate": 8.463386527839349e-05, "loss": 1.3688, "step": 3928 }, { "epoch": 0.5628133505228478, "grad_norm": 1.0846012830734253, "learning_rate": 8.458802020901503e-05, "loss": 1.34, "step": 3929 }, { "epoch": 0.5629565964761496, "grad_norm": 1.254239797592163, "learning_rate": 8.454217845745439e-05, "loss": 1.5473, "step": 3930 }, { "epoch": 0.5630998424294513, "grad_norm": 1.2421461343765259, "learning_rate": 8.449634003358022e-05, "loss": 1.3548, "step": 3931 }, { "epoch": 0.5632430883827532, "grad_norm": 1.0915660858154297, "learning_rate": 8.44505049472604e-05, "loss": 1.4373, "step": 3932 }, { "epoch": 0.563386334336055, "grad_norm": 1.2855892181396484, "learning_rate": 8.440467320836208e-05, "loss": 1.4674, "step": 3933 }, { "epoch": 0.5635295802893568, "grad_norm": 0.9798267483711243, "learning_rate": 8.435884482675168e-05, "loss": 1.382, "step": 3934 }, { "epoch": 0.5636728262426587, "grad_norm": 1.1581510305404663, "learning_rate": 8.431301981229492e-05, "loss": 1.3357, "step": 3935 }, { "epoch": 0.5638160721959604, "grad_norm": 1.156049132347107, "learning_rate": 8.42671981748568e-05, "loss": 1.3556, "step": 3936 }, { "epoch": 0.5639593181492623, "grad_norm": 0.9260801076889038, "learning_rate": 8.422137992430162e-05, "loss": 1.3248, "step": 3937 }, { "epoch": 0.5641025641025641, "grad_norm": 1.0331379175186157, "learning_rate": 8.417556507049285e-05, "loss": 1.3735, "step": 3938 }, { "epoch": 0.5642458100558659, "grad_norm": 1.1090047359466553, "learning_rate": 8.412975362329332e-05, "loss": 1.3409, "step": 3939 }, { "epoch": 0.5643890560091678, "grad_norm": 1.098628282546997, "learning_rate": 8.408394559256516e-05, "loss": 1.3075, "step": 3940 }, { "epoch": 0.5645323019624695, "grad_norm": 1.0496476888656616, "learning_rate": 8.40381409881696e-05, "loss": 1.4826, "step": 3941 }, { "epoch": 0.5646755479157713, "grad_norm": 0.9968166947364807, "learning_rate": 8.399233981996731e-05, "loss": 1.4571, "step": 3942 }, { "epoch": 0.5648187938690732, "grad_norm": 1.21078622341156, "learning_rate": 8.394654209781813e-05, "loss": 1.4344, "step": 3943 }, { "epoch": 0.564962039822375, "grad_norm": 1.0950098037719727, "learning_rate": 8.390074783158116e-05, "loss": 1.4021, "step": 3944 }, { "epoch": 0.5651052857756769, "grad_norm": 0.9033725261688232, "learning_rate": 8.385495703111483e-05, "loss": 1.268, "step": 3945 }, { "epoch": 0.5652485317289787, "grad_norm": 1.0935604572296143, "learning_rate": 8.380916970627666e-05, "loss": 1.4423, "step": 3946 }, { "epoch": 0.5653917776822804, "grad_norm": 0.9992495775222778, "learning_rate": 8.376338586692366e-05, "loss": 1.3864, "step": 3947 }, { "epoch": 0.5655350236355823, "grad_norm": 1.1999882459640503, "learning_rate": 8.371760552291183e-05, "loss": 1.4, "step": 3948 }, { "epoch": 0.5656782695888841, "grad_norm": 1.177969217300415, "learning_rate": 8.367182868409662e-05, "loss": 1.3745, "step": 3949 }, { "epoch": 0.565821515542186, "grad_norm": 1.396816611289978, "learning_rate": 8.362605536033265e-05, "loss": 1.4306, "step": 3950 }, { "epoch": 0.5659647614954878, "grad_norm": 1.2220689058303833, "learning_rate": 8.358028556147377e-05, "loss": 1.2499, "step": 3951 }, { "epoch": 0.5661080074487895, "grad_norm": 1.132494330406189, "learning_rate": 8.353451929737304e-05, "loss": 1.4422, "step": 3952 }, { "epoch": 0.5662512534020914, "grad_norm": 1.2047371864318848, "learning_rate": 8.348875657788291e-05, "loss": 1.3615, "step": 3953 }, { "epoch": 0.5663944993553932, "grad_norm": 1.0638824701309204, "learning_rate": 8.344299741285486e-05, "loss": 1.3023, "step": 3954 }, { "epoch": 0.566537745308695, "grad_norm": 1.0057721138000488, "learning_rate": 8.339724181213977e-05, "loss": 1.3085, "step": 3955 }, { "epoch": 0.5666809912619969, "grad_norm": 1.2296563386917114, "learning_rate": 8.335148978558764e-05, "loss": 1.4909, "step": 3956 }, { "epoch": 0.5668242372152986, "grad_norm": 1.0066629648208618, "learning_rate": 8.330574134304776e-05, "loss": 1.3211, "step": 3957 }, { "epoch": 0.5669674831686005, "grad_norm": 1.301001787185669, "learning_rate": 8.32599964943687e-05, "loss": 1.2835, "step": 3958 }, { "epoch": 0.5671107291219023, "grad_norm": 1.3211230039596558, "learning_rate": 8.32142552493981e-05, "loss": 1.4301, "step": 3959 }, { "epoch": 0.5672539750752041, "grad_norm": 1.1973659992218018, "learning_rate": 8.316851761798298e-05, "loss": 1.4096, "step": 3960 }, { "epoch": 0.567397221028506, "grad_norm": 1.138844609260559, "learning_rate": 8.312278360996952e-05, "loss": 1.3388, "step": 3961 }, { "epoch": 0.5675404669818077, "grad_norm": 1.3326082229614258, "learning_rate": 8.307705323520305e-05, "loss": 1.4922, "step": 3962 }, { "epoch": 0.5676837129351096, "grad_norm": 1.052769422531128, "learning_rate": 8.303132650352831e-05, "loss": 1.2773, "step": 3963 }, { "epoch": 0.5678269588884114, "grad_norm": 1.1518619060516357, "learning_rate": 8.298560342478901e-05, "loss": 1.5905, "step": 3964 }, { "epoch": 0.5679702048417132, "grad_norm": 1.367877721786499, "learning_rate": 8.293988400882826e-05, "loss": 1.4601, "step": 3965 }, { "epoch": 0.5681134507950151, "grad_norm": 1.0225945711135864, "learning_rate": 8.289416826548834e-05, "loss": 1.487, "step": 3966 }, { "epoch": 0.5682566967483169, "grad_norm": 1.0794260501861572, "learning_rate": 8.284845620461067e-05, "loss": 1.6319, "step": 3967 }, { "epoch": 0.5683999427016186, "grad_norm": 1.0988457202911377, "learning_rate": 8.28027478360359e-05, "loss": 1.3488, "step": 3968 }, { "epoch": 0.5685431886549205, "grad_norm": 1.0937910079956055, "learning_rate": 8.275704316960403e-05, "loss": 1.1671, "step": 3969 }, { "epoch": 0.5686864346082223, "grad_norm": 1.015166997909546, "learning_rate": 8.271134221515403e-05, "loss": 1.3646, "step": 3970 }, { "epoch": 0.5688296805615242, "grad_norm": 1.0831773281097412, "learning_rate": 8.266564498252425e-05, "loss": 1.6984, "step": 3971 }, { "epoch": 0.568972926514826, "grad_norm": 1.0808922052383423, "learning_rate": 8.261995148155213e-05, "loss": 1.5173, "step": 3972 }, { "epoch": 0.5691161724681277, "grad_norm": 1.053452730178833, "learning_rate": 8.257426172207434e-05, "loss": 1.3327, "step": 3973 }, { "epoch": 0.5692594184214296, "grad_norm": 1.1618472337722778, "learning_rate": 8.252857571392683e-05, "loss": 1.5115, "step": 3974 }, { "epoch": 0.5694026643747314, "grad_norm": 1.021773099899292, "learning_rate": 8.248289346694457e-05, "loss": 1.5194, "step": 3975 }, { "epoch": 0.5695459103280333, "grad_norm": 1.0760945081710815, "learning_rate": 8.24372149909619e-05, "loss": 1.4145, "step": 3976 }, { "epoch": 0.5696891562813351, "grad_norm": 1.0001275539398193, "learning_rate": 8.239154029581222e-05, "loss": 1.383, "step": 3977 }, { "epoch": 0.5698324022346368, "grad_norm": 1.0532678365707397, "learning_rate": 8.234586939132817e-05, "loss": 1.3821, "step": 3978 }, { "epoch": 0.5699756481879387, "grad_norm": 0.9672591090202332, "learning_rate": 8.230020228734159e-05, "loss": 1.5348, "step": 3979 }, { "epoch": 0.5701188941412405, "grad_norm": 1.1478209495544434, "learning_rate": 8.22545389936834e-05, "loss": 1.4502, "step": 3980 }, { "epoch": 0.5702621400945423, "grad_norm": 1.231196641921997, "learning_rate": 8.220887952018387e-05, "loss": 1.421, "step": 3981 }, { "epoch": 0.5704053860478442, "grad_norm": 1.0397428274154663, "learning_rate": 8.216322387667232e-05, "loss": 1.3013, "step": 3982 }, { "epoch": 0.5705486320011459, "grad_norm": 1.0203744173049927, "learning_rate": 8.211757207297727e-05, "loss": 1.6783, "step": 3983 }, { "epoch": 0.5706918779544478, "grad_norm": 1.0634477138519287, "learning_rate": 8.207192411892646e-05, "loss": 1.3888, "step": 3984 }, { "epoch": 0.5708351239077496, "grad_norm": 0.9715363383293152, "learning_rate": 8.202628002434672e-05, "loss": 1.5194, "step": 3985 }, { "epoch": 0.5709783698610514, "grad_norm": 1.0375038385391235, "learning_rate": 8.198063979906407e-05, "loss": 1.4894, "step": 3986 }, { "epoch": 0.5711216158143533, "grad_norm": 1.3240010738372803, "learning_rate": 8.19350034529038e-05, "loss": 1.3202, "step": 3987 }, { "epoch": 0.5712648617676551, "grad_norm": 1.0191293954849243, "learning_rate": 8.188937099569024e-05, "loss": 1.354, "step": 3988 }, { "epoch": 0.5714081077209568, "grad_norm": 1.102787733078003, "learning_rate": 8.184374243724693e-05, "loss": 1.3427, "step": 3989 }, { "epoch": 0.5715513536742587, "grad_norm": 1.0335856676101685, "learning_rate": 8.17981177873966e-05, "loss": 1.4533, "step": 3990 }, { "epoch": 0.5716945996275605, "grad_norm": 1.0352792739868164, "learning_rate": 8.175249705596105e-05, "loss": 1.3224, "step": 3991 }, { "epoch": 0.5718378455808624, "grad_norm": 1.1521683931350708, "learning_rate": 8.170688025276134e-05, "loss": 1.3636, "step": 3992 }, { "epoch": 0.5719810915341642, "grad_norm": 1.0134963989257812, "learning_rate": 8.166126738761764e-05, "loss": 1.4362, "step": 3993 }, { "epoch": 0.5721243374874659, "grad_norm": 1.052530288696289, "learning_rate": 8.161565847034925e-05, "loss": 1.3769, "step": 3994 }, { "epoch": 0.5722675834407678, "grad_norm": 1.054436206817627, "learning_rate": 8.157005351077465e-05, "loss": 1.3641, "step": 3995 }, { "epoch": 0.5724108293940696, "grad_norm": 0.9436133503913879, "learning_rate": 8.152445251871144e-05, "loss": 1.4507, "step": 3996 }, { "epoch": 0.5725540753473715, "grad_norm": 1.3076878786087036, "learning_rate": 8.147885550397645e-05, "loss": 1.4484, "step": 3997 }, { "epoch": 0.5726973213006733, "grad_norm": 1.1398749351501465, "learning_rate": 8.143326247638549e-05, "loss": 1.4209, "step": 3998 }, { "epoch": 0.572840567253975, "grad_norm": 1.0094306468963623, "learning_rate": 8.138767344575368e-05, "loss": 1.5894, "step": 3999 }, { "epoch": 0.5729838132072769, "grad_norm": 1.1019415855407715, "learning_rate": 8.134208842189522e-05, "loss": 1.4945, "step": 4000 }, { "epoch": 0.5731270591605787, "grad_norm": 1.3345929384231567, "learning_rate": 8.12965074146234e-05, "loss": 1.4221, "step": 4001 }, { "epoch": 0.5732703051138806, "grad_norm": 1.0852625370025635, "learning_rate": 8.125093043375064e-05, "loss": 1.3058, "step": 4002 }, { "epoch": 0.5734135510671824, "grad_norm": 1.083067774772644, "learning_rate": 8.120535748908866e-05, "loss": 1.504, "step": 4003 }, { "epoch": 0.5735567970204841, "grad_norm": 1.0089505910873413, "learning_rate": 8.11597885904481e-05, "loss": 1.3618, "step": 4004 }, { "epoch": 0.573700042973786, "grad_norm": 1.242389440536499, "learning_rate": 8.111422374763883e-05, "loss": 1.5587, "step": 4005 }, { "epoch": 0.5738432889270878, "grad_norm": 1.1502662897109985, "learning_rate": 8.106866297046983e-05, "loss": 1.4469, "step": 4006 }, { "epoch": 0.5739865348803896, "grad_norm": 1.0065104961395264, "learning_rate": 8.10231062687492e-05, "loss": 1.4672, "step": 4007 }, { "epoch": 0.5741297808336915, "grad_norm": 1.0659387111663818, "learning_rate": 8.097755365228425e-05, "loss": 1.4156, "step": 4008 }, { "epoch": 0.5742730267869932, "grad_norm": 1.1952720880508423, "learning_rate": 8.093200513088118e-05, "loss": 1.492, "step": 4009 }, { "epoch": 0.574416272740295, "grad_norm": 1.0429340600967407, "learning_rate": 8.088646071434559e-05, "loss": 1.4798, "step": 4010 }, { "epoch": 0.5745595186935969, "grad_norm": 1.0296752452850342, "learning_rate": 8.084092041248203e-05, "loss": 1.2927, "step": 4011 }, { "epoch": 0.5747027646468987, "grad_norm": 0.9452112913131714, "learning_rate": 8.079538423509417e-05, "loss": 1.7685, "step": 4012 }, { "epoch": 0.5748460106002006, "grad_norm": 1.1228902339935303, "learning_rate": 8.074985219198488e-05, "loss": 1.4869, "step": 4013 }, { "epoch": 0.5749892565535024, "grad_norm": 1.2515939474105835, "learning_rate": 8.070432429295599e-05, "loss": 1.3411, "step": 4014 }, { "epoch": 0.5751325025068041, "grad_norm": 1.3844817876815796, "learning_rate": 8.065880054780861e-05, "loss": 1.4273, "step": 4015 }, { "epoch": 0.575275748460106, "grad_norm": 1.2724149227142334, "learning_rate": 8.061328096634288e-05, "loss": 1.4378, "step": 4016 }, { "epoch": 0.5754189944134078, "grad_norm": 1.1060429811477661, "learning_rate": 8.056776555835798e-05, "loss": 1.3972, "step": 4017 }, { "epoch": 0.5755622403667097, "grad_norm": 1.1961658000946045, "learning_rate": 8.052225433365226e-05, "loss": 1.43, "step": 4018 }, { "epoch": 0.5757054863200115, "grad_norm": 1.1288669109344482, "learning_rate": 8.047674730202325e-05, "loss": 1.514, "step": 4019 }, { "epoch": 0.5758487322733132, "grad_norm": 1.0612808465957642, "learning_rate": 8.043124447326736e-05, "loss": 1.4434, "step": 4020 }, { "epoch": 0.5759919782266151, "grad_norm": 1.1025935411453247, "learning_rate": 8.038574585718032e-05, "loss": 1.278, "step": 4021 }, { "epoch": 0.5761352241799169, "grad_norm": 1.148199439048767, "learning_rate": 8.03402514635568e-05, "loss": 1.5157, "step": 4022 }, { "epoch": 0.5762784701332188, "grad_norm": 1.1931822299957275, "learning_rate": 8.029476130219064e-05, "loss": 1.33, "step": 4023 }, { "epoch": 0.5764217160865206, "grad_norm": 1.1266332864761353, "learning_rate": 8.024927538287476e-05, "loss": 1.3397, "step": 4024 }, { "epoch": 0.5765649620398223, "grad_norm": 1.1183571815490723, "learning_rate": 8.02037937154011e-05, "loss": 1.4459, "step": 4025 }, { "epoch": 0.5767082079931242, "grad_norm": 1.1886775493621826, "learning_rate": 8.015831630956079e-05, "loss": 1.5075, "step": 4026 }, { "epoch": 0.576851453946426, "grad_norm": 1.073609709739685, "learning_rate": 8.011284317514398e-05, "loss": 1.3052, "step": 4027 }, { "epoch": 0.5769946998997278, "grad_norm": 1.1282099485397339, "learning_rate": 8.006737432193989e-05, "loss": 1.4253, "step": 4028 }, { "epoch": 0.5771379458530297, "grad_norm": 1.2322652339935303, "learning_rate": 8.002190975973689e-05, "loss": 1.3272, "step": 4029 }, { "epoch": 0.5772811918063314, "grad_norm": 1.1895850896835327, "learning_rate": 7.997644949832228e-05, "loss": 1.4017, "step": 4030 }, { "epoch": 0.5774244377596333, "grad_norm": 1.0355268716812134, "learning_rate": 7.993099354748261e-05, "loss": 1.382, "step": 4031 }, { "epoch": 0.5775676837129351, "grad_norm": 1.2945213317871094, "learning_rate": 7.988554191700343e-05, "loss": 1.5764, "step": 4032 }, { "epoch": 0.5777109296662369, "grad_norm": 0.952515184879303, "learning_rate": 7.98400946166693e-05, "loss": 1.4734, "step": 4033 }, { "epoch": 0.5778541756195388, "grad_norm": 1.1861485242843628, "learning_rate": 7.979465165626392e-05, "loss": 1.3564, "step": 4034 }, { "epoch": 0.5779974215728406, "grad_norm": 0.9773125648498535, "learning_rate": 7.974921304557002e-05, "loss": 1.3533, "step": 4035 }, { "epoch": 0.5781406675261423, "grad_norm": 1.1982392072677612, "learning_rate": 7.970377879436941e-05, "loss": 1.3583, "step": 4036 }, { "epoch": 0.5782839134794442, "grad_norm": 1.1087453365325928, "learning_rate": 7.965834891244301e-05, "loss": 1.4546, "step": 4037 }, { "epoch": 0.578427159432746, "grad_norm": 1.1653337478637695, "learning_rate": 7.961292340957069e-05, "loss": 1.3689, "step": 4038 }, { "epoch": 0.5785704053860479, "grad_norm": 0.925621509552002, "learning_rate": 7.956750229553145e-05, "loss": 1.5919, "step": 4039 }, { "epoch": 0.5787136513393497, "grad_norm": 1.099250316619873, "learning_rate": 7.952208558010336e-05, "loss": 1.5213, "step": 4040 }, { "epoch": 0.5788568972926514, "grad_norm": 0.9960086941719055, "learning_rate": 7.947667327306348e-05, "loss": 1.4788, "step": 4041 }, { "epoch": 0.5790001432459533, "grad_norm": 0.9202391505241394, "learning_rate": 7.943126538418802e-05, "loss": 1.3521, "step": 4042 }, { "epoch": 0.5791433891992551, "grad_norm": 1.127137541770935, "learning_rate": 7.938586192325205e-05, "loss": 1.5062, "step": 4043 }, { "epoch": 0.579286635152557, "grad_norm": 1.044081211090088, "learning_rate": 7.934046290002991e-05, "loss": 1.4451, "step": 4044 }, { "epoch": 0.5794298811058588, "grad_norm": 1.0595406293869019, "learning_rate": 7.929506832429489e-05, "loss": 1.5446, "step": 4045 }, { "epoch": 0.5795731270591605, "grad_norm": 1.1960043907165527, "learning_rate": 7.924967820581928e-05, "loss": 1.4994, "step": 4046 }, { "epoch": 0.5797163730124624, "grad_norm": 0.968834638595581, "learning_rate": 7.920429255437447e-05, "loss": 1.4074, "step": 4047 }, { "epoch": 0.5798596189657642, "grad_norm": 1.2260991334915161, "learning_rate": 7.915891137973082e-05, "loss": 1.3711, "step": 4048 }, { "epoch": 0.580002864919066, "grad_norm": 1.111136794090271, "learning_rate": 7.911353469165782e-05, "loss": 1.4278, "step": 4049 }, { "epoch": 0.5801461108723679, "grad_norm": 0.9621102213859558, "learning_rate": 7.9068162499924e-05, "loss": 1.5178, "step": 4050 }, { "epoch": 0.5802893568256696, "grad_norm": 1.1072837114334106, "learning_rate": 7.902279481429675e-05, "loss": 1.3059, "step": 4051 }, { "epoch": 0.5804326027789715, "grad_norm": 1.0482300519943237, "learning_rate": 7.897743164454267e-05, "loss": 1.435, "step": 4052 }, { "epoch": 0.5805758487322733, "grad_norm": 1.0429731607437134, "learning_rate": 7.89320730004274e-05, "loss": 1.428, "step": 4053 }, { "epoch": 0.5807190946855751, "grad_norm": 1.0610629320144653, "learning_rate": 7.888671889171538e-05, "loss": 1.6398, "step": 4054 }, { "epoch": 0.580862340638877, "grad_norm": 1.0603666305541992, "learning_rate": 7.884136932817037e-05, "loss": 1.5053, "step": 4055 }, { "epoch": 0.5810055865921788, "grad_norm": 1.2090955972671509, "learning_rate": 7.879602431955492e-05, "loss": 1.3053, "step": 4056 }, { "epoch": 0.5811488325454806, "grad_norm": 1.138738989830017, "learning_rate": 7.875068387563069e-05, "loss": 1.2628, "step": 4057 }, { "epoch": 0.5812920784987824, "grad_norm": 1.163888692855835, "learning_rate": 7.870534800615845e-05, "loss": 1.2563, "step": 4058 }, { "epoch": 0.5814353244520842, "grad_norm": 1.1937397718429565, "learning_rate": 7.866001672089776e-05, "loss": 1.5258, "step": 4059 }, { "epoch": 0.5815785704053861, "grad_norm": 1.0095622539520264, "learning_rate": 7.861469002960742e-05, "loss": 1.3591, "step": 4060 }, { "epoch": 0.5817218163586879, "grad_norm": 1.1307746171951294, "learning_rate": 7.856936794204513e-05, "loss": 1.4926, "step": 4061 }, { "epoch": 0.5818650623119896, "grad_norm": 1.050833821296692, "learning_rate": 7.852405046796756e-05, "loss": 1.51, "step": 4062 }, { "epoch": 0.5820083082652915, "grad_norm": 1.0171937942504883, "learning_rate": 7.84787376171305e-05, "loss": 1.3694, "step": 4063 }, { "epoch": 0.5821515542185933, "grad_norm": 1.167988896369934, "learning_rate": 7.843342939928864e-05, "loss": 1.4884, "step": 4064 }, { "epoch": 0.5822948001718952, "grad_norm": 1.1452760696411133, "learning_rate": 7.838812582419574e-05, "loss": 1.4827, "step": 4065 }, { "epoch": 0.582438046125197, "grad_norm": 1.161697268486023, "learning_rate": 7.834282690160457e-05, "loss": 1.3815, "step": 4066 }, { "epoch": 0.5825812920784987, "grad_norm": 0.9736148118972778, "learning_rate": 7.829753264126681e-05, "loss": 1.3525, "step": 4067 }, { "epoch": 0.5827245380318006, "grad_norm": 1.1203678846359253, "learning_rate": 7.82522430529332e-05, "loss": 1.4147, "step": 4068 }, { "epoch": 0.5828677839851024, "grad_norm": 1.2531877756118774, "learning_rate": 7.820695814635356e-05, "loss": 1.4253, "step": 4069 }, { "epoch": 0.5830110299384043, "grad_norm": 1.0416511297225952, "learning_rate": 7.816167793127646e-05, "loss": 1.4336, "step": 4070 }, { "epoch": 0.5831542758917061, "grad_norm": 0.962336003780365, "learning_rate": 7.811640241744975e-05, "loss": 1.2869, "step": 4071 }, { "epoch": 0.5832975218450078, "grad_norm": 1.1722517013549805, "learning_rate": 7.807113161462003e-05, "loss": 1.3873, "step": 4072 }, { "epoch": 0.5834407677983097, "grad_norm": 0.9998098611831665, "learning_rate": 7.802586553253301e-05, "loss": 1.4721, "step": 4073 }, { "epoch": 0.5835840137516115, "grad_norm": 0.999030351638794, "learning_rate": 7.79806041809334e-05, "loss": 1.4494, "step": 4074 }, { "epoch": 0.5837272597049133, "grad_norm": 1.364902138710022, "learning_rate": 7.793534756956477e-05, "loss": 1.4415, "step": 4075 }, { "epoch": 0.5838705056582152, "grad_norm": 1.0513964891433716, "learning_rate": 7.789009570816985e-05, "loss": 1.5833, "step": 4076 }, { "epoch": 0.584013751611517, "grad_norm": 1.21407949924469, "learning_rate": 7.784484860649013e-05, "loss": 1.4014, "step": 4077 }, { "epoch": 0.5841569975648188, "grad_norm": 1.1745800971984863, "learning_rate": 7.779960627426627e-05, "loss": 1.5007, "step": 4078 }, { "epoch": 0.5843002435181206, "grad_norm": 1.0718715190887451, "learning_rate": 7.775436872123783e-05, "loss": 1.5661, "step": 4079 }, { "epoch": 0.5844434894714224, "grad_norm": 0.9887749552726746, "learning_rate": 7.770913595714327e-05, "loss": 1.3546, "step": 4080 }, { "epoch": 0.5845867354247243, "grad_norm": 1.1965467929840088, "learning_rate": 7.766390799172012e-05, "loss": 1.3469, "step": 4081 }, { "epoch": 0.5847299813780261, "grad_norm": 1.1297179460525513, "learning_rate": 7.76186848347049e-05, "loss": 1.2296, "step": 4082 }, { "epoch": 0.5848732273313278, "grad_norm": 1.093031406402588, "learning_rate": 7.757346649583294e-05, "loss": 1.5112, "step": 4083 }, { "epoch": 0.5850164732846297, "grad_norm": 1.2159183025360107, "learning_rate": 7.75282529848387e-05, "loss": 1.324, "step": 4084 }, { "epoch": 0.5851597192379315, "grad_norm": 0.9650058746337891, "learning_rate": 7.74830443114555e-05, "loss": 1.5063, "step": 4085 }, { "epoch": 0.5853029651912334, "grad_norm": 1.1315046548843384, "learning_rate": 7.743784048541561e-05, "loss": 1.2774, "step": 4086 }, { "epoch": 0.5854462111445352, "grad_norm": 1.0977623462677002, "learning_rate": 7.739264151645039e-05, "loss": 1.4178, "step": 4087 }, { "epoch": 0.5855894570978369, "grad_norm": 1.045953392982483, "learning_rate": 7.734744741428997e-05, "loss": 1.4494, "step": 4088 }, { "epoch": 0.5857327030511388, "grad_norm": 1.086333990097046, "learning_rate": 7.730225818866358e-05, "loss": 1.435, "step": 4089 }, { "epoch": 0.5858759490044406, "grad_norm": 1.2137300968170166, "learning_rate": 7.725707384929932e-05, "loss": 1.4743, "step": 4090 }, { "epoch": 0.5860191949577425, "grad_norm": 0.9116840362548828, "learning_rate": 7.721189440592423e-05, "loss": 1.4257, "step": 4091 }, { "epoch": 0.5861624409110443, "grad_norm": 0.9345767498016357, "learning_rate": 7.716671986826443e-05, "loss": 1.3673, "step": 4092 }, { "epoch": 0.586305686864346, "grad_norm": 1.1537500619888306, "learning_rate": 7.712155024604473e-05, "loss": 1.4915, "step": 4093 }, { "epoch": 0.5864489328176479, "grad_norm": 0.9780703783035278, "learning_rate": 7.707638554898913e-05, "loss": 1.4099, "step": 4094 }, { "epoch": 0.5865921787709497, "grad_norm": 1.0122650861740112, "learning_rate": 7.703122578682046e-05, "loss": 1.4124, "step": 4095 }, { "epoch": 0.5867354247242516, "grad_norm": 1.1379693746566772, "learning_rate": 7.698607096926048e-05, "loss": 1.4942, "step": 4096 }, { "epoch": 0.5868786706775534, "grad_norm": 0.9861220121383667, "learning_rate": 7.69409211060299e-05, "loss": 1.442, "step": 4097 }, { "epoch": 0.5870219166308551, "grad_norm": 1.0483198165893555, "learning_rate": 7.689577620684842e-05, "loss": 1.5484, "step": 4098 }, { "epoch": 0.587165162584157, "grad_norm": 0.9657735824584961, "learning_rate": 7.685063628143455e-05, "loss": 1.4297, "step": 4099 }, { "epoch": 0.5873084085374588, "grad_norm": 1.0831466913223267, "learning_rate": 7.680550133950586e-05, "loss": 1.4629, "step": 4100 }, { "epoch": 0.5874516544907606, "grad_norm": 0.9783119559288025, "learning_rate": 7.676037139077874e-05, "loss": 1.4518, "step": 4101 }, { "epoch": 0.5875949004440625, "grad_norm": 0.9771605134010315, "learning_rate": 7.671524644496853e-05, "loss": 1.298, "step": 4102 }, { "epoch": 0.5877381463973643, "grad_norm": 1.1136901378631592, "learning_rate": 7.667012651178963e-05, "loss": 1.4304, "step": 4103 }, { "epoch": 0.587881392350666, "grad_norm": 1.159201741218567, "learning_rate": 7.662501160095509e-05, "loss": 1.5095, "step": 4104 }, { "epoch": 0.5880246383039679, "grad_norm": 0.9954947233200073, "learning_rate": 7.657990172217718e-05, "loss": 1.4314, "step": 4105 }, { "epoch": 0.5881678842572697, "grad_norm": 1.3003029823303223, "learning_rate": 7.653479688516683e-05, "loss": 1.3413, "step": 4106 }, { "epoch": 0.5883111302105716, "grad_norm": 1.0706342458724976, "learning_rate": 7.648969709963405e-05, "loss": 1.4444, "step": 4107 }, { "epoch": 0.5884543761638734, "grad_norm": 1.2283451557159424, "learning_rate": 7.644460237528771e-05, "loss": 1.379, "step": 4108 }, { "epoch": 0.5885976221171751, "grad_norm": 1.0265097618103027, "learning_rate": 7.639951272183551e-05, "loss": 1.4598, "step": 4109 }, { "epoch": 0.588740868070477, "grad_norm": 1.102243185043335, "learning_rate": 7.635442814898426e-05, "loss": 1.4984, "step": 4110 }, { "epoch": 0.5888841140237788, "grad_norm": 1.187455177307129, "learning_rate": 7.630934866643949e-05, "loss": 1.3111, "step": 4111 }, { "epoch": 0.5890273599770807, "grad_norm": 1.2127901315689087, "learning_rate": 7.626427428390567e-05, "loss": 1.3828, "step": 4112 }, { "epoch": 0.5891706059303825, "grad_norm": 1.2042181491851807, "learning_rate": 7.621920501108627e-05, "loss": 1.4411, "step": 4113 }, { "epoch": 0.5893138518836842, "grad_norm": 1.066982388496399, "learning_rate": 7.617414085768351e-05, "loss": 1.3189, "step": 4114 }, { "epoch": 0.5894570978369861, "grad_norm": 1.051342487335205, "learning_rate": 7.612908183339862e-05, "loss": 1.3499, "step": 4115 }, { "epoch": 0.5896003437902879, "grad_norm": 1.2656009197235107, "learning_rate": 7.608402794793174e-05, "loss": 1.4651, "step": 4116 }, { "epoch": 0.5897435897435898, "grad_norm": 0.9503814578056335, "learning_rate": 7.603897921098177e-05, "loss": 1.5977, "step": 4117 }, { "epoch": 0.5898868356968916, "grad_norm": 1.4044922590255737, "learning_rate": 7.599393563224666e-05, "loss": 1.5585, "step": 4118 }, { "epoch": 0.5900300816501933, "grad_norm": 0.955354630947113, "learning_rate": 7.594889722142316e-05, "loss": 1.4689, "step": 4119 }, { "epoch": 0.5901733276034952, "grad_norm": 1.0804277658462524, "learning_rate": 7.590386398820687e-05, "loss": 1.398, "step": 4120 }, { "epoch": 0.590316573556797, "grad_norm": 1.2909722328186035, "learning_rate": 7.585883594229243e-05, "loss": 1.4093, "step": 4121 }, { "epoch": 0.5904598195100988, "grad_norm": 1.1141750812530518, "learning_rate": 7.581381309337318e-05, "loss": 1.5269, "step": 4122 }, { "epoch": 0.5906030654634007, "grad_norm": 0.9842514395713806, "learning_rate": 7.576879545114145e-05, "loss": 1.4431, "step": 4123 }, { "epoch": 0.5907463114167025, "grad_norm": 1.1852399110794067, "learning_rate": 7.572378302528847e-05, "loss": 1.512, "step": 4124 }, { "epoch": 0.5908895573700043, "grad_norm": 1.1678013801574707, "learning_rate": 7.56787758255042e-05, "loss": 1.3506, "step": 4125 }, { "epoch": 0.5910328033233061, "grad_norm": 0.9724790453910828, "learning_rate": 7.56337738614777e-05, "loss": 1.4405, "step": 4126 }, { "epoch": 0.5911760492766079, "grad_norm": 1.127126693725586, "learning_rate": 7.558877714289667e-05, "loss": 1.3432, "step": 4127 }, { "epoch": 0.5913192952299098, "grad_norm": 0.8662317991256714, "learning_rate": 7.554378567944786e-05, "loss": 1.3631, "step": 4128 }, { "epoch": 0.5914625411832116, "grad_norm": 1.2070472240447998, "learning_rate": 7.549879948081681e-05, "loss": 1.6516, "step": 4129 }, { "epoch": 0.5916057871365134, "grad_norm": 1.0595580339431763, "learning_rate": 7.54538185566879e-05, "loss": 1.469, "step": 4130 }, { "epoch": 0.5917490330898152, "grad_norm": 0.9995733499526978, "learning_rate": 7.54088429167444e-05, "loss": 1.4044, "step": 4131 }, { "epoch": 0.591892279043117, "grad_norm": 1.281429648399353, "learning_rate": 7.536387257066854e-05, "loss": 1.2951, "step": 4132 }, { "epoch": 0.5920355249964189, "grad_norm": 1.0896403789520264, "learning_rate": 7.531890752814123e-05, "loss": 1.4317, "step": 4133 }, { "epoch": 0.5921787709497207, "grad_norm": 1.200994849205017, "learning_rate": 7.52739477988424e-05, "loss": 1.5888, "step": 4134 }, { "epoch": 0.5923220169030224, "grad_norm": 1.0673589706420898, "learning_rate": 7.52289933924507e-05, "loss": 1.4086, "step": 4135 }, { "epoch": 0.5924652628563243, "grad_norm": 1.1783102750778198, "learning_rate": 7.518404431864373e-05, "loss": 1.297, "step": 4136 }, { "epoch": 0.5926085088096261, "grad_norm": 0.9957106709480286, "learning_rate": 7.513910058709798e-05, "loss": 1.5496, "step": 4137 }, { "epoch": 0.592751754762928, "grad_norm": 1.2256033420562744, "learning_rate": 7.50941622074886e-05, "loss": 1.3981, "step": 4138 }, { "epoch": 0.5928950007162298, "grad_norm": 0.9200305342674255, "learning_rate": 7.504922918948978e-05, "loss": 1.4156, "step": 4139 }, { "epoch": 0.5930382466695315, "grad_norm": 1.1790897846221924, "learning_rate": 7.500430154277452e-05, "loss": 1.4273, "step": 4140 }, { "epoch": 0.5931814926228334, "grad_norm": 1.0630582571029663, "learning_rate": 7.495937927701455e-05, "loss": 1.3231, "step": 4141 }, { "epoch": 0.5933247385761352, "grad_norm": 1.1541399955749512, "learning_rate": 7.49144624018806e-05, "loss": 1.3764, "step": 4142 }, { "epoch": 0.593467984529437, "grad_norm": 1.188480257987976, "learning_rate": 7.486955092704207e-05, "loss": 1.495, "step": 4143 }, { "epoch": 0.5936112304827389, "grad_norm": 1.1780165433883667, "learning_rate": 7.482464486216737e-05, "loss": 1.262, "step": 4144 }, { "epoch": 0.5937544764360407, "grad_norm": 1.0949242115020752, "learning_rate": 7.477974421692367e-05, "loss": 1.3558, "step": 4145 }, { "epoch": 0.5938977223893425, "grad_norm": 1.138108730316162, "learning_rate": 7.47348490009769e-05, "loss": 1.3837, "step": 4146 }, { "epoch": 0.5940409683426443, "grad_norm": 0.995722770690918, "learning_rate": 7.468995922399189e-05, "loss": 1.5003, "step": 4147 }, { "epoch": 0.5941842142959461, "grad_norm": 1.2390016317367554, "learning_rate": 7.464507489563242e-05, "loss": 1.5789, "step": 4148 }, { "epoch": 0.594327460249248, "grad_norm": 1.035523772239685, "learning_rate": 7.460019602556081e-05, "loss": 1.5099, "step": 4149 }, { "epoch": 0.5944707062025498, "grad_norm": 1.2457205057144165, "learning_rate": 7.45553226234385e-05, "loss": 1.3649, "step": 4150 }, { "epoch": 0.5946139521558516, "grad_norm": 1.1202598810195923, "learning_rate": 7.451045469892554e-05, "loss": 1.262, "step": 4151 }, { "epoch": 0.5947571981091534, "grad_norm": 1.224773645401001, "learning_rate": 7.44655922616809e-05, "loss": 1.3971, "step": 4152 }, { "epoch": 0.5949004440624552, "grad_norm": 1.2140729427337646, "learning_rate": 7.442073532136244e-05, "loss": 1.5063, "step": 4153 }, { "epoch": 0.5950436900157571, "grad_norm": 0.8836366534233093, "learning_rate": 7.43758838876266e-05, "loss": 1.4853, "step": 4154 }, { "epoch": 0.5951869359690589, "grad_norm": 1.0752472877502441, "learning_rate": 7.433103797012892e-05, "loss": 1.456, "step": 4155 }, { "epoch": 0.5953301819223606, "grad_norm": 1.1957483291625977, "learning_rate": 7.428619757852352e-05, "loss": 1.5008, "step": 4156 }, { "epoch": 0.5954734278756625, "grad_norm": 1.1404597759246826, "learning_rate": 7.424136272246347e-05, "loss": 1.4522, "step": 4157 }, { "epoch": 0.5956166738289643, "grad_norm": 1.217352271080017, "learning_rate": 7.419653341160062e-05, "loss": 1.4309, "step": 4158 }, { "epoch": 0.5957599197822662, "grad_norm": 1.3097559213638306, "learning_rate": 7.415170965558553e-05, "loss": 1.4148, "step": 4159 }, { "epoch": 0.595903165735568, "grad_norm": 1.1112788915634155, "learning_rate": 7.410689146406775e-05, "loss": 1.3385, "step": 4160 }, { "epoch": 0.5960464116888697, "grad_norm": 1.4310517311096191, "learning_rate": 7.40620788466955e-05, "loss": 1.3791, "step": 4161 }, { "epoch": 0.5961896576421716, "grad_norm": 1.0071412324905396, "learning_rate": 7.401727181311578e-05, "loss": 1.4749, "step": 4162 }, { "epoch": 0.5963329035954734, "grad_norm": 1.010916829109192, "learning_rate": 7.397247037297449e-05, "loss": 1.386, "step": 4163 }, { "epoch": 0.5964761495487753, "grad_norm": 1.0271905660629272, "learning_rate": 7.392767453591623e-05, "loss": 1.3405, "step": 4164 }, { "epoch": 0.5966193955020771, "grad_norm": 1.1605325937271118, "learning_rate": 7.388288431158443e-05, "loss": 1.43, "step": 4165 }, { "epoch": 0.5967626414553789, "grad_norm": 1.067371129989624, "learning_rate": 7.383809970962137e-05, "loss": 1.422, "step": 4166 }, { "epoch": 0.5969058874086807, "grad_norm": 1.1182775497436523, "learning_rate": 7.379332073966804e-05, "loss": 1.1895, "step": 4167 }, { "epoch": 0.5970491333619825, "grad_norm": 1.0228168964385986, "learning_rate": 7.374854741136422e-05, "loss": 1.5937, "step": 4168 }, { "epoch": 0.5971923793152843, "grad_norm": 1.2450546026229858, "learning_rate": 7.370377973434855e-05, "loss": 1.3985, "step": 4169 }, { "epoch": 0.5973356252685862, "grad_norm": 1.225685477256775, "learning_rate": 7.365901771825833e-05, "loss": 1.5442, "step": 4170 }, { "epoch": 0.597478871221888, "grad_norm": 1.0614421367645264, "learning_rate": 7.361426137272982e-05, "loss": 1.4296, "step": 4171 }, { "epoch": 0.5976221171751898, "grad_norm": 1.2749584913253784, "learning_rate": 7.356951070739781e-05, "loss": 1.2497, "step": 4172 }, { "epoch": 0.5977653631284916, "grad_norm": 1.076539158821106, "learning_rate": 7.352476573189614e-05, "loss": 1.5376, "step": 4173 }, { "epoch": 0.5979086090817934, "grad_norm": 1.1491841077804565, "learning_rate": 7.348002645585725e-05, "loss": 1.3928, "step": 4174 }, { "epoch": 0.5980518550350953, "grad_norm": 1.2570098638534546, "learning_rate": 7.343529288891239e-05, "loss": 1.2062, "step": 4175 }, { "epoch": 0.5981951009883971, "grad_norm": 1.2708749771118164, "learning_rate": 7.33905650406916e-05, "loss": 1.3741, "step": 4176 }, { "epoch": 0.5983383469416989, "grad_norm": 1.116259217262268, "learning_rate": 7.334584292082365e-05, "loss": 1.4933, "step": 4177 }, { "epoch": 0.5984815928950007, "grad_norm": 1.0042681694030762, "learning_rate": 7.330112653893614e-05, "loss": 1.5124, "step": 4178 }, { "epoch": 0.5986248388483025, "grad_norm": 1.110458493232727, "learning_rate": 7.325641590465542e-05, "loss": 1.4164, "step": 4179 }, { "epoch": 0.5987680848016044, "grad_norm": 1.1291543245315552, "learning_rate": 7.321171102760653e-05, "loss": 1.4849, "step": 4180 }, { "epoch": 0.5989113307549062, "grad_norm": 1.1526830196380615, "learning_rate": 7.316701191741333e-05, "loss": 1.3997, "step": 4181 }, { "epoch": 0.5990545767082079, "grad_norm": 1.0835102796554565, "learning_rate": 7.31223185836985e-05, "loss": 1.5266, "step": 4182 }, { "epoch": 0.5991978226615098, "grad_norm": 1.0526680946350098, "learning_rate": 7.307763103608332e-05, "loss": 1.5701, "step": 4183 }, { "epoch": 0.5993410686148116, "grad_norm": 1.0574432611465454, "learning_rate": 7.303294928418799e-05, "loss": 1.3117, "step": 4184 }, { "epoch": 0.5994843145681135, "grad_norm": 1.101569652557373, "learning_rate": 7.298827333763132e-05, "loss": 1.5238, "step": 4185 }, { "epoch": 0.5996275605214153, "grad_norm": 1.1604955196380615, "learning_rate": 7.294360320603095e-05, "loss": 1.6361, "step": 4186 }, { "epoch": 0.5997708064747171, "grad_norm": 1.101251244544983, "learning_rate": 7.289893889900332e-05, "loss": 1.2875, "step": 4187 }, { "epoch": 0.5999140524280189, "grad_norm": 1.1338562965393066, "learning_rate": 7.285428042616344e-05, "loss": 1.433, "step": 4188 }, { "epoch": 0.6000572983813207, "grad_norm": 1.2323518991470337, "learning_rate": 7.280962779712525e-05, "loss": 1.4939, "step": 4189 }, { "epoch": 0.6002005443346226, "grad_norm": 1.0256216526031494, "learning_rate": 7.276498102150138e-05, "loss": 1.3425, "step": 4190 }, { "epoch": 0.6003437902879244, "grad_norm": 0.9391832947731018, "learning_rate": 7.272034010890309e-05, "loss": 1.555, "step": 4191 }, { "epoch": 0.6004870362412262, "grad_norm": 1.264958143234253, "learning_rate": 7.267570506894052e-05, "loss": 1.3776, "step": 4192 }, { "epoch": 0.600630282194528, "grad_norm": 0.943480908870697, "learning_rate": 7.263107591122246e-05, "loss": 1.4483, "step": 4193 }, { "epoch": 0.6007735281478298, "grad_norm": 1.3064343929290771, "learning_rate": 7.258645264535649e-05, "loss": 1.5057, "step": 4194 }, { "epoch": 0.6009167741011316, "grad_norm": 1.0172686576843262, "learning_rate": 7.254183528094891e-05, "loss": 1.3136, "step": 4195 }, { "epoch": 0.6010600200544335, "grad_norm": 1.185977816581726, "learning_rate": 7.249722382760469e-05, "loss": 1.395, "step": 4196 }, { "epoch": 0.6012032660077353, "grad_norm": 0.9948939085006714, "learning_rate": 7.245261829492755e-05, "loss": 1.404, "step": 4197 }, { "epoch": 0.6013465119610371, "grad_norm": 1.046208381652832, "learning_rate": 7.24080186925201e-05, "loss": 1.5871, "step": 4198 }, { "epoch": 0.6014897579143389, "grad_norm": 1.0895016193389893, "learning_rate": 7.236342502998335e-05, "loss": 1.5129, "step": 4199 }, { "epoch": 0.6016330038676407, "grad_norm": 1.0407259464263916, "learning_rate": 7.231883731691732e-05, "loss": 1.7093, "step": 4200 }, { "epoch": 0.6017762498209426, "grad_norm": 1.0808143615722656, "learning_rate": 7.22742555629206e-05, "loss": 1.3784, "step": 4201 }, { "epoch": 0.6019194957742444, "grad_norm": 1.1595443487167358, "learning_rate": 7.222967977759056e-05, "loss": 1.4016, "step": 4202 }, { "epoch": 0.6020627417275461, "grad_norm": 0.9473376274108887, "learning_rate": 7.218510997052326e-05, "loss": 1.3154, "step": 4203 }, { "epoch": 0.602205987680848, "grad_norm": 1.0887020826339722, "learning_rate": 7.214054615131345e-05, "loss": 1.2286, "step": 4204 }, { "epoch": 0.6023492336341498, "grad_norm": 1.2252492904663086, "learning_rate": 7.209598832955469e-05, "loss": 1.4846, "step": 4205 }, { "epoch": 0.6024924795874517, "grad_norm": 0.9583574533462524, "learning_rate": 7.205143651483906e-05, "loss": 1.406, "step": 4206 }, { "epoch": 0.6026357255407535, "grad_norm": 1.072060227394104, "learning_rate": 7.200689071675755e-05, "loss": 1.4619, "step": 4207 }, { "epoch": 0.6027789714940552, "grad_norm": 1.3092440366744995, "learning_rate": 7.196235094489978e-05, "loss": 1.3726, "step": 4208 }, { "epoch": 0.6029222174473571, "grad_norm": 1.1472665071487427, "learning_rate": 7.1917817208854e-05, "loss": 1.4685, "step": 4209 }, { "epoch": 0.6030654634006589, "grad_norm": 0.9766077995300293, "learning_rate": 7.187328951820723e-05, "loss": 1.4257, "step": 4210 }, { "epoch": 0.6032087093539608, "grad_norm": 1.1146196126937866, "learning_rate": 7.182876788254525e-05, "loss": 1.5676, "step": 4211 }, { "epoch": 0.6033519553072626, "grad_norm": 0.9668264985084534, "learning_rate": 7.178425231145236e-05, "loss": 1.2464, "step": 4212 }, { "epoch": 0.6034952012605644, "grad_norm": 1.0392547845840454, "learning_rate": 7.173974281451176e-05, "loss": 1.6053, "step": 4213 }, { "epoch": 0.6036384472138662, "grad_norm": 1.0427402257919312, "learning_rate": 7.169523940130519e-05, "loss": 1.3612, "step": 4214 }, { "epoch": 0.603781693167168, "grad_norm": 0.9183353781700134, "learning_rate": 7.16507420814131e-05, "loss": 1.6304, "step": 4215 }, { "epoch": 0.6039249391204699, "grad_norm": 1.1151803731918335, "learning_rate": 7.160625086441476e-05, "loss": 1.5209, "step": 4216 }, { "epoch": 0.6040681850737717, "grad_norm": 1.2650302648544312, "learning_rate": 7.156176575988794e-05, "loss": 1.2602, "step": 4217 }, { "epoch": 0.6042114310270735, "grad_norm": 1.0805094242095947, "learning_rate": 7.151728677740923e-05, "loss": 1.2032, "step": 4218 }, { "epoch": 0.6043546769803753, "grad_norm": 0.9683591723442078, "learning_rate": 7.147281392655385e-05, "loss": 1.5932, "step": 4219 }, { "epoch": 0.6044979229336771, "grad_norm": 1.0664361715316772, "learning_rate": 7.142834721689565e-05, "loss": 1.534, "step": 4220 }, { "epoch": 0.6046411688869789, "grad_norm": 1.0504882335662842, "learning_rate": 7.138388665800733e-05, "loss": 1.3112, "step": 4221 }, { "epoch": 0.6047844148402808, "grad_norm": 1.1913233995437622, "learning_rate": 7.133943225946e-05, "loss": 1.2581, "step": 4222 }, { "epoch": 0.6049276607935826, "grad_norm": 1.0433645248413086, "learning_rate": 7.129498403082369e-05, "loss": 1.4469, "step": 4223 }, { "epoch": 0.6050709067468844, "grad_norm": 1.1232019662857056, "learning_rate": 7.125054198166701e-05, "loss": 1.4002, "step": 4224 }, { "epoch": 0.6052141527001862, "grad_norm": 1.151464581489563, "learning_rate": 7.120610612155716e-05, "loss": 1.5652, "step": 4225 }, { "epoch": 0.605357398653488, "grad_norm": 0.8970821499824524, "learning_rate": 7.11616764600601e-05, "loss": 1.7904, "step": 4226 }, { "epoch": 0.6055006446067899, "grad_norm": 1.2024282217025757, "learning_rate": 7.111725300674052e-05, "loss": 1.4129, "step": 4227 }, { "epoch": 0.6056438905600917, "grad_norm": 1.0418825149536133, "learning_rate": 7.107283577116161e-05, "loss": 1.3158, "step": 4228 }, { "epoch": 0.6057871365133934, "grad_norm": 1.041850209236145, "learning_rate": 7.102842476288534e-05, "loss": 1.4392, "step": 4229 }, { "epoch": 0.6059303824666953, "grad_norm": 1.0541305541992188, "learning_rate": 7.098401999147226e-05, "loss": 1.3479, "step": 4230 }, { "epoch": 0.6060736284199971, "grad_norm": 0.9854674339294434, "learning_rate": 7.093962146648164e-05, "loss": 1.5298, "step": 4231 }, { "epoch": 0.606216874373299, "grad_norm": 1.0490758419036865, "learning_rate": 7.089522919747142e-05, "loss": 1.5571, "step": 4232 }, { "epoch": 0.6063601203266008, "grad_norm": 1.1308553218841553, "learning_rate": 7.085084319399808e-05, "loss": 1.4666, "step": 4233 }, { "epoch": 0.6065033662799026, "grad_norm": 1.142994999885559, "learning_rate": 7.08064634656169e-05, "loss": 1.308, "step": 4234 }, { "epoch": 0.6066466122332044, "grad_norm": 1.0846341848373413, "learning_rate": 7.076209002188168e-05, "loss": 1.3641, "step": 4235 }, { "epoch": 0.6067898581865062, "grad_norm": 1.8414368629455566, "learning_rate": 7.071772287234497e-05, "loss": 1.5357, "step": 4236 }, { "epoch": 0.606933104139808, "grad_norm": 1.0243433713912964, "learning_rate": 7.067336202655792e-05, "loss": 1.5019, "step": 4237 }, { "epoch": 0.6070763500931099, "grad_norm": 1.1277682781219482, "learning_rate": 7.062900749407026e-05, "loss": 1.2696, "step": 4238 }, { "epoch": 0.6072195960464117, "grad_norm": 1.0590330362319946, "learning_rate": 7.058465928443048e-05, "loss": 1.5483, "step": 4239 }, { "epoch": 0.6073628419997135, "grad_norm": 1.019850730895996, "learning_rate": 7.054031740718567e-05, "loss": 1.4716, "step": 4240 }, { "epoch": 0.6075060879530153, "grad_norm": 0.9222798943519592, "learning_rate": 7.049598187188148e-05, "loss": 1.4367, "step": 4241 }, { "epoch": 0.6076493339063171, "grad_norm": 0.9350789189338684, "learning_rate": 7.045165268806231e-05, "loss": 1.39, "step": 4242 }, { "epoch": 0.607792579859619, "grad_norm": 1.0299463272094727, "learning_rate": 7.040732986527108e-05, "loss": 1.4142, "step": 4243 }, { "epoch": 0.6079358258129208, "grad_norm": 1.1178940534591675, "learning_rate": 7.03630134130494e-05, "loss": 1.3787, "step": 4244 }, { "epoch": 0.6080790717662226, "grad_norm": 0.9920247197151184, "learning_rate": 7.03187033409376e-05, "loss": 1.3926, "step": 4245 }, { "epoch": 0.6082223177195244, "grad_norm": 1.0793055295944214, "learning_rate": 7.027439965847442e-05, "loss": 1.4906, "step": 4246 }, { "epoch": 0.6083655636728262, "grad_norm": 1.327938437461853, "learning_rate": 7.023010237519739e-05, "loss": 1.3738, "step": 4247 }, { "epoch": 0.6085088096261281, "grad_norm": 0.9713819622993469, "learning_rate": 7.018581150064269e-05, "loss": 1.5806, "step": 4248 }, { "epoch": 0.6086520555794299, "grad_norm": 1.1489806175231934, "learning_rate": 7.014152704434494e-05, "loss": 1.3484, "step": 4249 }, { "epoch": 0.6087953015327316, "grad_norm": 0.9832737445831299, "learning_rate": 7.009724901583755e-05, "loss": 1.3335, "step": 4250 }, { "epoch": 0.6089385474860335, "grad_norm": 1.181135892868042, "learning_rate": 7.005297742465247e-05, "loss": 1.3087, "step": 4251 }, { "epoch": 0.6090817934393353, "grad_norm": 1.2180101871490479, "learning_rate": 7.000871228032027e-05, "loss": 1.3228, "step": 4252 }, { "epoch": 0.6092250393926372, "grad_norm": 0.8919790387153625, "learning_rate": 6.996445359237016e-05, "loss": 1.5048, "step": 4253 }, { "epoch": 0.609368285345939, "grad_norm": 0.9915915131568909, "learning_rate": 6.992020137032988e-05, "loss": 1.5087, "step": 4254 }, { "epoch": 0.6095115312992408, "grad_norm": 1.1417628526687622, "learning_rate": 6.987595562372596e-05, "loss": 1.35, "step": 4255 }, { "epoch": 0.6096547772525426, "grad_norm": 1.0542162656784058, "learning_rate": 6.983171636208328e-05, "loss": 1.4884, "step": 4256 }, { "epoch": 0.6097980232058444, "grad_norm": 1.1912367343902588, "learning_rate": 6.978748359492553e-05, "loss": 1.5181, "step": 4257 }, { "epoch": 0.6099412691591463, "grad_norm": 1.161149024963379, "learning_rate": 6.974325733177495e-05, "loss": 1.4194, "step": 4258 }, { "epoch": 0.6100845151124481, "grad_norm": 1.0689343214035034, "learning_rate": 6.96990375821523e-05, "loss": 1.3595, "step": 4259 }, { "epoch": 0.6102277610657499, "grad_norm": 1.454522967338562, "learning_rate": 6.9654824355577e-05, "loss": 1.2879, "step": 4260 }, { "epoch": 0.6103710070190517, "grad_norm": 1.1314560174942017, "learning_rate": 6.961061766156715e-05, "loss": 1.5118, "step": 4261 }, { "epoch": 0.6105142529723535, "grad_norm": 0.9639393091201782, "learning_rate": 6.956641750963927e-05, "loss": 1.4141, "step": 4262 }, { "epoch": 0.6106574989256554, "grad_norm": 1.0022913217544556, "learning_rate": 6.952222390930858e-05, "loss": 1.431, "step": 4263 }, { "epoch": 0.6108007448789572, "grad_norm": 0.959291934967041, "learning_rate": 6.947803687008888e-05, "loss": 1.5013, "step": 4264 }, { "epoch": 0.610943990832259, "grad_norm": 1.1029284000396729, "learning_rate": 6.943385640149251e-05, "loss": 1.4265, "step": 4265 }, { "epoch": 0.6110872367855608, "grad_norm": 1.052175521850586, "learning_rate": 6.938968251303053e-05, "loss": 1.4451, "step": 4266 }, { "epoch": 0.6112304827388626, "grad_norm": 1.0712398290634155, "learning_rate": 6.934551521421235e-05, "loss": 1.3255, "step": 4267 }, { "epoch": 0.6113737286921644, "grad_norm": 0.9968187212944031, "learning_rate": 6.93013545145462e-05, "loss": 1.4126, "step": 4268 }, { "epoch": 0.6115169746454663, "grad_norm": 1.038417935371399, "learning_rate": 6.925720042353876e-05, "loss": 1.6231, "step": 4269 }, { "epoch": 0.6116602205987681, "grad_norm": 1.0528451204299927, "learning_rate": 6.921305295069528e-05, "loss": 1.4078, "step": 4270 }, { "epoch": 0.6118034665520699, "grad_norm": 1.3477317094802856, "learning_rate": 6.916891210551965e-05, "loss": 1.192, "step": 4271 }, { "epoch": 0.6119467125053717, "grad_norm": 1.0575848817825317, "learning_rate": 6.912477789751426e-05, "loss": 1.2557, "step": 4272 }, { "epoch": 0.6120899584586735, "grad_norm": 1.0288550853729248, "learning_rate": 6.908065033618018e-05, "loss": 1.54, "step": 4273 }, { "epoch": 0.6122332044119754, "grad_norm": 1.1299831867218018, "learning_rate": 6.903652943101697e-05, "loss": 1.3998, "step": 4274 }, { "epoch": 0.6123764503652772, "grad_norm": 0.9560255408287048, "learning_rate": 6.89924151915227e-05, "loss": 1.408, "step": 4275 }, { "epoch": 0.612519696318579, "grad_norm": 0.9701712131500244, "learning_rate": 6.894830762719411e-05, "loss": 1.4396, "step": 4276 }, { "epoch": 0.6126629422718808, "grad_norm": 1.0037755966186523, "learning_rate": 6.890420674752653e-05, "loss": 1.4684, "step": 4277 }, { "epoch": 0.6128061882251826, "grad_norm": 1.1921309232711792, "learning_rate": 6.886011256201371e-05, "loss": 1.4515, "step": 4278 }, { "epoch": 0.6129494341784845, "grad_norm": 1.0522853136062622, "learning_rate": 6.881602508014808e-05, "loss": 1.492, "step": 4279 }, { "epoch": 0.6130926801317863, "grad_norm": 1.0163192749023438, "learning_rate": 6.877194431142055e-05, "loss": 1.5184, "step": 4280 }, { "epoch": 0.6132359260850881, "grad_norm": 0.9095003604888916, "learning_rate": 6.872787026532062e-05, "loss": 1.447, "step": 4281 }, { "epoch": 0.6133791720383899, "grad_norm": 1.3466076850891113, "learning_rate": 6.868380295133641e-05, "loss": 1.2762, "step": 4282 }, { "epoch": 0.6135224179916917, "grad_norm": 1.1882424354553223, "learning_rate": 6.86397423789544e-05, "loss": 1.4759, "step": 4283 }, { "epoch": 0.6136656639449936, "grad_norm": 1.255839228630066, "learning_rate": 6.859568855765985e-05, "loss": 1.4593, "step": 4284 }, { "epoch": 0.6138089098982954, "grad_norm": 1.0026546716690063, "learning_rate": 6.855164149693641e-05, "loss": 1.3282, "step": 4285 }, { "epoch": 0.6139521558515972, "grad_norm": 1.0240823030471802, "learning_rate": 6.850760120626633e-05, "loss": 1.3893, "step": 4286 }, { "epoch": 0.614095401804899, "grad_norm": 1.1057519912719727, "learning_rate": 6.84635676951304e-05, "loss": 1.4381, "step": 4287 }, { "epoch": 0.6142386477582008, "grad_norm": 0.9514822363853455, "learning_rate": 6.841954097300791e-05, "loss": 1.4008, "step": 4288 }, { "epoch": 0.6143818937115026, "grad_norm": 1.039219617843628, "learning_rate": 6.837552104937679e-05, "loss": 1.4591, "step": 4289 }, { "epoch": 0.6145251396648045, "grad_norm": 1.2077304124832153, "learning_rate": 6.833150793371341e-05, "loss": 1.2696, "step": 4290 }, { "epoch": 0.6146683856181063, "grad_norm": 0.9501237869262695, "learning_rate": 6.828750163549267e-05, "loss": 1.4338, "step": 4291 }, { "epoch": 0.6148116315714081, "grad_norm": 0.8435130715370178, "learning_rate": 6.824350216418808e-05, "loss": 1.3838, "step": 4292 }, { "epoch": 0.6149548775247099, "grad_norm": 0.9703522324562073, "learning_rate": 6.819950952927161e-05, "loss": 1.2938, "step": 4293 }, { "epoch": 0.6150981234780117, "grad_norm": 1.0569028854370117, "learning_rate": 6.815552374021378e-05, "loss": 1.4936, "step": 4294 }, { "epoch": 0.6152413694313136, "grad_norm": 0.9793410897254944, "learning_rate": 6.811154480648371e-05, "loss": 1.5457, "step": 4295 }, { "epoch": 0.6153846153846154, "grad_norm": 1.0446767807006836, "learning_rate": 6.80675727375489e-05, "loss": 1.5509, "step": 4296 }, { "epoch": 0.6155278613379173, "grad_norm": 1.0542500019073486, "learning_rate": 6.802360754287547e-05, "loss": 1.3835, "step": 4297 }, { "epoch": 0.615671107291219, "grad_norm": 1.2045929431915283, "learning_rate": 6.797964923192807e-05, "loss": 1.2305, "step": 4298 }, { "epoch": 0.6158143532445208, "grad_norm": 1.0417587757110596, "learning_rate": 6.793569781416978e-05, "loss": 1.4487, "step": 4299 }, { "epoch": 0.6159575991978227, "grad_norm": 1.017083764076233, "learning_rate": 6.789175329906232e-05, "loss": 1.5643, "step": 4300 }, { "epoch": 0.6161008451511245, "grad_norm": 1.3205528259277344, "learning_rate": 6.784781569606576e-05, "loss": 1.3747, "step": 4301 }, { "epoch": 0.6162440911044264, "grad_norm": 1.064139723777771, "learning_rate": 6.780388501463887e-05, "loss": 1.3241, "step": 4302 }, { "epoch": 0.6163873370577281, "grad_norm": 1.2266569137573242, "learning_rate": 6.775996126423882e-05, "loss": 1.3339, "step": 4303 }, { "epoch": 0.6165305830110299, "grad_norm": 1.1888982057571411, "learning_rate": 6.771604445432127e-05, "loss": 1.4214, "step": 4304 }, { "epoch": 0.6166738289643318, "grad_norm": 1.0135010480880737, "learning_rate": 6.767213459434047e-05, "loss": 1.43, "step": 4305 }, { "epoch": 0.6168170749176336, "grad_norm": 1.1999993324279785, "learning_rate": 6.762823169374906e-05, "loss": 1.3565, "step": 4306 }, { "epoch": 0.6169603208709354, "grad_norm": 1.078255295753479, "learning_rate": 6.758433576199832e-05, "loss": 1.5479, "step": 4307 }, { "epoch": 0.6171035668242372, "grad_norm": 1.3791784048080444, "learning_rate": 6.754044680853794e-05, "loss": 1.4519, "step": 4308 }, { "epoch": 0.617246812777539, "grad_norm": 0.9597680568695068, "learning_rate": 6.749656484281608e-05, "loss": 1.3832, "step": 4309 }, { "epoch": 0.6173900587308409, "grad_norm": 1.0660167932510376, "learning_rate": 6.745268987427946e-05, "loss": 1.4179, "step": 4310 }, { "epoch": 0.6175333046841427, "grad_norm": 0.9361695051193237, "learning_rate": 6.740882191237334e-05, "loss": 1.5645, "step": 4311 }, { "epoch": 0.6176765506374445, "grad_norm": 1.1283692121505737, "learning_rate": 6.736496096654134e-05, "loss": 1.2424, "step": 4312 }, { "epoch": 0.6178197965907463, "grad_norm": 1.091731309890747, "learning_rate": 6.732110704622564e-05, "loss": 1.4349, "step": 4313 }, { "epoch": 0.6179630425440481, "grad_norm": 0.9673663973808289, "learning_rate": 6.727726016086693e-05, "loss": 1.4077, "step": 4314 }, { "epoch": 0.6181062884973499, "grad_norm": 0.9439156651496887, "learning_rate": 6.723342031990431e-05, "loss": 1.4306, "step": 4315 }, { "epoch": 0.6182495344506518, "grad_norm": 0.9377850294113159, "learning_rate": 6.71895875327755e-05, "loss": 1.2805, "step": 4316 }, { "epoch": 0.6183927804039536, "grad_norm": 1.2535158395767212, "learning_rate": 6.714576180891654e-05, "loss": 1.4505, "step": 4317 }, { "epoch": 0.6185360263572554, "grad_norm": 0.9291293025016785, "learning_rate": 6.710194315776203e-05, "loss": 1.5022, "step": 4318 }, { "epoch": 0.6186792723105572, "grad_norm": 1.030538558959961, "learning_rate": 6.705813158874509e-05, "loss": 1.4603, "step": 4319 }, { "epoch": 0.618822518263859, "grad_norm": 1.0712991952896118, "learning_rate": 6.70143271112972e-05, "loss": 1.2475, "step": 4320 }, { "epoch": 0.6189657642171609, "grad_norm": 1.2680820226669312, "learning_rate": 6.697052973484845e-05, "loss": 1.2832, "step": 4321 }, { "epoch": 0.6191090101704627, "grad_norm": 1.0325183868408203, "learning_rate": 6.692673946882727e-05, "loss": 1.4727, "step": 4322 }, { "epoch": 0.6192522561237646, "grad_norm": 1.106132984161377, "learning_rate": 6.688295632266064e-05, "loss": 1.4177, "step": 4323 }, { "epoch": 0.6193955020770663, "grad_norm": 1.0620390176773071, "learning_rate": 6.683918030577402e-05, "loss": 1.4906, "step": 4324 }, { "epoch": 0.6195387480303681, "grad_norm": 1.1507896184921265, "learning_rate": 6.679541142759126e-05, "loss": 1.4685, "step": 4325 }, { "epoch": 0.61968199398367, "grad_norm": 1.0380597114562988, "learning_rate": 6.675164969753472e-05, "loss": 1.3283, "step": 4326 }, { "epoch": 0.6198252399369718, "grad_norm": 1.0493804216384888, "learning_rate": 6.670789512502527e-05, "loss": 1.4178, "step": 4327 }, { "epoch": 0.6199684858902736, "grad_norm": 1.0655943155288696, "learning_rate": 6.666414771948211e-05, "loss": 1.355, "step": 4328 }, { "epoch": 0.6201117318435754, "grad_norm": 0.961478054523468, "learning_rate": 6.662040749032303e-05, "loss": 1.5201, "step": 4329 }, { "epoch": 0.6202549777968772, "grad_norm": 1.170172095298767, "learning_rate": 6.65766744469642e-05, "loss": 1.5383, "step": 4330 }, { "epoch": 0.6203982237501791, "grad_norm": 1.3746247291564941, "learning_rate": 6.653294859882027e-05, "loss": 1.1033, "step": 4331 }, { "epoch": 0.6205414697034809, "grad_norm": 1.3051588535308838, "learning_rate": 6.648922995530433e-05, "loss": 1.4448, "step": 4332 }, { "epoch": 0.6206847156567827, "grad_norm": 1.041752815246582, "learning_rate": 6.644551852582787e-05, "loss": 1.5704, "step": 4333 }, { "epoch": 0.6208279616100845, "grad_norm": 0.9749700427055359, "learning_rate": 6.6401814319801e-05, "loss": 1.0998, "step": 4334 }, { "epoch": 0.6209712075633863, "grad_norm": 1.117542028427124, "learning_rate": 6.635811734663202e-05, "loss": 1.4643, "step": 4335 }, { "epoch": 0.6211144535166881, "grad_norm": 1.2276312112808228, "learning_rate": 6.631442761572788e-05, "loss": 1.3601, "step": 4336 }, { "epoch": 0.62125769946999, "grad_norm": 1.165616512298584, "learning_rate": 6.627074513649392e-05, "loss": 1.3909, "step": 4337 }, { "epoch": 0.6214009454232918, "grad_norm": 1.1910256147384644, "learning_rate": 6.622706991833383e-05, "loss": 1.3367, "step": 4338 }, { "epoch": 0.6215441913765936, "grad_norm": 1.0784599781036377, "learning_rate": 6.618340197064983e-05, "loss": 1.3365, "step": 4339 }, { "epoch": 0.6216874373298954, "grad_norm": 1.2258021831512451, "learning_rate": 6.613974130284258e-05, "loss": 1.4701, "step": 4340 }, { "epoch": 0.6218306832831972, "grad_norm": 0.9904068112373352, "learning_rate": 6.60960879243111e-05, "loss": 1.5921, "step": 4341 }, { "epoch": 0.6219739292364991, "grad_norm": 1.0260205268859863, "learning_rate": 6.605244184445292e-05, "loss": 1.2602, "step": 4342 }, { "epoch": 0.6221171751898009, "grad_norm": 1.2487592697143555, "learning_rate": 6.600880307266393e-05, "loss": 1.4236, "step": 4343 }, { "epoch": 0.6222604211431028, "grad_norm": 0.9697585701942444, "learning_rate": 6.596517161833845e-05, "loss": 1.3647, "step": 4344 }, { "epoch": 0.6224036670964045, "grad_norm": 1.0318689346313477, "learning_rate": 6.592154749086934e-05, "loss": 1.4249, "step": 4345 }, { "epoch": 0.6225469130497063, "grad_norm": 1.2537623643875122, "learning_rate": 6.587793069964771e-05, "loss": 1.4691, "step": 4346 }, { "epoch": 0.6226901590030082, "grad_norm": 0.9639636874198914, "learning_rate": 6.583432125406323e-05, "loss": 1.2842, "step": 4347 }, { "epoch": 0.62283340495631, "grad_norm": 1.045020580291748, "learning_rate": 6.579071916350393e-05, "loss": 1.6159, "step": 4348 }, { "epoch": 0.6229766509096119, "grad_norm": 0.9704081416130066, "learning_rate": 6.57471244373562e-05, "loss": 1.3843, "step": 4349 }, { "epoch": 0.6231198968629136, "grad_norm": 0.9892087578773499, "learning_rate": 6.5703537085005e-05, "loss": 1.458, "step": 4350 }, { "epoch": 0.6232631428162154, "grad_norm": 1.1427958011627197, "learning_rate": 6.565995711583353e-05, "loss": 1.4811, "step": 4351 }, { "epoch": 0.6234063887695173, "grad_norm": 1.0081120729446411, "learning_rate": 6.561638453922349e-05, "loss": 1.4028, "step": 4352 }, { "epoch": 0.6235496347228191, "grad_norm": 0.9824202060699463, "learning_rate": 6.557281936455506e-05, "loss": 1.3992, "step": 4353 }, { "epoch": 0.6236928806761209, "grad_norm": 1.043860912322998, "learning_rate": 6.552926160120663e-05, "loss": 1.3606, "step": 4354 }, { "epoch": 0.6238361266294227, "grad_norm": 0.9067694544792175, "learning_rate": 6.548571125855519e-05, "loss": 1.5557, "step": 4355 }, { "epoch": 0.6239793725827245, "grad_norm": 1.0535426139831543, "learning_rate": 6.544216834597597e-05, "loss": 1.3799, "step": 4356 }, { "epoch": 0.6241226185360264, "grad_norm": 1.2056607007980347, "learning_rate": 6.539863287284275e-05, "loss": 1.4768, "step": 4357 }, { "epoch": 0.6242658644893282, "grad_norm": 0.9185512661933899, "learning_rate": 6.535510484852767e-05, "loss": 1.5403, "step": 4358 }, { "epoch": 0.62440911044263, "grad_norm": 0.9265509247779846, "learning_rate": 6.531158428240113e-05, "loss": 1.6739, "step": 4359 }, { "epoch": 0.6245523563959318, "grad_norm": 1.0269006490707397, "learning_rate": 6.52680711838321e-05, "loss": 1.3949, "step": 4360 }, { "epoch": 0.6246956023492336, "grad_norm": 1.1948872804641724, "learning_rate": 6.522456556218791e-05, "loss": 1.3824, "step": 4361 }, { "epoch": 0.6248388483025354, "grad_norm": 1.0690702199935913, "learning_rate": 6.518106742683415e-05, "loss": 1.4393, "step": 4362 }, { "epoch": 0.6249820942558373, "grad_norm": 1.0668222904205322, "learning_rate": 6.513757678713495e-05, "loss": 1.518, "step": 4363 }, { "epoch": 0.6251253402091391, "grad_norm": 1.1756402254104614, "learning_rate": 6.509409365245276e-05, "loss": 1.33, "step": 4364 }, { "epoch": 0.625268586162441, "grad_norm": 1.1705524921417236, "learning_rate": 6.50506180321484e-05, "loss": 1.2453, "step": 4365 }, { "epoch": 0.6254118321157427, "grad_norm": 1.2229130268096924, "learning_rate": 6.500714993558115e-05, "loss": 1.397, "step": 4366 }, { "epoch": 0.6255550780690445, "grad_norm": 0.9605799317359924, "learning_rate": 6.496368937210853e-05, "loss": 1.406, "step": 4367 }, { "epoch": 0.6256983240223464, "grad_norm": 0.9673969149589539, "learning_rate": 6.49202363510866e-05, "loss": 1.4262, "step": 4368 }, { "epoch": 0.6258415699756482, "grad_norm": 1.1713780164718628, "learning_rate": 6.487679088186973e-05, "loss": 1.3311, "step": 4369 }, { "epoch": 0.6259848159289501, "grad_norm": 1.0765293836593628, "learning_rate": 6.483335297381057e-05, "loss": 1.4639, "step": 4370 }, { "epoch": 0.6261280618822518, "grad_norm": 0.9536416530609131, "learning_rate": 6.478992263626031e-05, "loss": 1.4857, "step": 4371 }, { "epoch": 0.6262713078355536, "grad_norm": 1.0394986867904663, "learning_rate": 6.474649987856834e-05, "loss": 1.3282, "step": 4372 }, { "epoch": 0.6264145537888555, "grad_norm": 0.9428933262825012, "learning_rate": 6.47030847100826e-05, "loss": 1.5142, "step": 4373 }, { "epoch": 0.6265577997421573, "grad_norm": 1.1511598825454712, "learning_rate": 6.465967714014927e-05, "loss": 1.341, "step": 4374 }, { "epoch": 0.6267010456954591, "grad_norm": 0.9480801224708557, "learning_rate": 6.461627717811288e-05, "loss": 1.33, "step": 4375 }, { "epoch": 0.6268442916487609, "grad_norm": 1.1816121339797974, "learning_rate": 6.457288483331639e-05, "loss": 1.3985, "step": 4376 }, { "epoch": 0.6269875376020627, "grad_norm": 1.0404813289642334, "learning_rate": 6.452950011510118e-05, "loss": 1.5845, "step": 4377 }, { "epoch": 0.6271307835553646, "grad_norm": 0.9674251079559326, "learning_rate": 6.448612303280677e-05, "loss": 1.4529, "step": 4378 }, { "epoch": 0.6272740295086664, "grad_norm": 1.1875535249710083, "learning_rate": 6.444275359577128e-05, "loss": 1.4005, "step": 4379 }, { "epoch": 0.6274172754619682, "grad_norm": 0.9839823842048645, "learning_rate": 6.439939181333101e-05, "loss": 1.4954, "step": 4380 }, { "epoch": 0.62756052141527, "grad_norm": 1.040177822113037, "learning_rate": 6.435603769482071e-05, "loss": 1.0878, "step": 4381 }, { "epoch": 0.6277037673685718, "grad_norm": 0.8938812017440796, "learning_rate": 6.431269124957347e-05, "loss": 1.6431, "step": 4382 }, { "epoch": 0.6278470133218736, "grad_norm": 1.0711067914962769, "learning_rate": 6.426935248692064e-05, "loss": 1.2503, "step": 4383 }, { "epoch": 0.6279902592751755, "grad_norm": 1.0661498308181763, "learning_rate": 6.422602141619207e-05, "loss": 1.4776, "step": 4384 }, { "epoch": 0.6281335052284773, "grad_norm": 1.0704522132873535, "learning_rate": 6.418269804671576e-05, "loss": 1.5105, "step": 4385 }, { "epoch": 0.6282767511817792, "grad_norm": 1.1132880449295044, "learning_rate": 6.413938238781824e-05, "loss": 1.3982, "step": 4386 }, { "epoch": 0.6284199971350809, "grad_norm": 1.1062909364700317, "learning_rate": 6.409607444882431e-05, "loss": 1.4186, "step": 4387 }, { "epoch": 0.6285632430883827, "grad_norm": 1.0716989040374756, "learning_rate": 6.405277423905705e-05, "loss": 1.3269, "step": 4388 }, { "epoch": 0.6287064890416846, "grad_norm": 1.0510997772216797, "learning_rate": 6.40094817678379e-05, "loss": 1.5088, "step": 4389 }, { "epoch": 0.6288497349949864, "grad_norm": 1.1650594472885132, "learning_rate": 6.396619704448677e-05, "loss": 1.4777, "step": 4390 }, { "epoch": 0.6289929809482883, "grad_norm": 1.041411280632019, "learning_rate": 6.392292007832168e-05, "loss": 1.4776, "step": 4391 }, { "epoch": 0.62913622690159, "grad_norm": 0.9809937477111816, "learning_rate": 6.387965087865915e-05, "loss": 1.4002, "step": 4392 }, { "epoch": 0.6292794728548918, "grad_norm": 1.183027982711792, "learning_rate": 6.383638945481391e-05, "loss": 1.4181, "step": 4393 }, { "epoch": 0.6294227188081937, "grad_norm": 1.1031701564788818, "learning_rate": 6.379313581609912e-05, "loss": 1.5543, "step": 4394 }, { "epoch": 0.6295659647614955, "grad_norm": 1.1924076080322266, "learning_rate": 6.374988997182623e-05, "loss": 1.4058, "step": 4395 }, { "epoch": 0.6297092107147974, "grad_norm": 1.3014189004898071, "learning_rate": 6.370665193130495e-05, "loss": 1.4321, "step": 4396 }, { "epoch": 0.6298524566680991, "grad_norm": 1.1979700326919556, "learning_rate": 6.36634217038434e-05, "loss": 1.4608, "step": 4397 }, { "epoch": 0.6299957026214009, "grad_norm": 1.238476276397705, "learning_rate": 6.362019929874799e-05, "loss": 1.2799, "step": 4398 }, { "epoch": 0.6301389485747028, "grad_norm": 1.080533504486084, "learning_rate": 6.357698472532338e-05, "loss": 1.3547, "step": 4399 }, { "epoch": 0.6302821945280046, "grad_norm": 0.9394568800926208, "learning_rate": 6.353377799287266e-05, "loss": 1.5745, "step": 4400 }, { "epoch": 0.6304254404813064, "grad_norm": 0.9222342371940613, "learning_rate": 6.349057911069709e-05, "loss": 1.4971, "step": 4401 }, { "epoch": 0.6305686864346082, "grad_norm": 1.2611083984375, "learning_rate": 6.344738808809639e-05, "loss": 1.5, "step": 4402 }, { "epoch": 0.63071193238791, "grad_norm": 1.0464383363723755, "learning_rate": 6.340420493436851e-05, "loss": 1.1798, "step": 4403 }, { "epoch": 0.6308551783412119, "grad_norm": 1.0896241664886475, "learning_rate": 6.33610296588097e-05, "loss": 1.3448, "step": 4404 }, { "epoch": 0.6309984242945137, "grad_norm": 1.097690463066101, "learning_rate": 6.33178622707145e-05, "loss": 1.3587, "step": 4405 }, { "epoch": 0.6311416702478155, "grad_norm": 1.1287853717803955, "learning_rate": 6.327470277937586e-05, "loss": 1.4861, "step": 4406 }, { "epoch": 0.6312849162011173, "grad_norm": 1.175378680229187, "learning_rate": 6.323155119408489e-05, "loss": 1.568, "step": 4407 }, { "epoch": 0.6314281621544191, "grad_norm": 0.937500536441803, "learning_rate": 6.318840752413106e-05, "loss": 1.3875, "step": 4408 }, { "epoch": 0.6315714081077209, "grad_norm": 1.0182346105575562, "learning_rate": 6.314527177880215e-05, "loss": 1.3817, "step": 4409 }, { "epoch": 0.6317146540610228, "grad_norm": 1.0185761451721191, "learning_rate": 6.310214396738419e-05, "loss": 1.5255, "step": 4410 }, { "epoch": 0.6318579000143246, "grad_norm": 1.3752186298370361, "learning_rate": 6.30590240991616e-05, "loss": 1.5994, "step": 4411 }, { "epoch": 0.6320011459676265, "grad_norm": 1.1595021486282349, "learning_rate": 6.301591218341693e-05, "loss": 1.4939, "step": 4412 }, { "epoch": 0.6321443919209282, "grad_norm": 1.0067163705825806, "learning_rate": 6.297280822943118e-05, "loss": 1.3161, "step": 4413 }, { "epoch": 0.63228763787423, "grad_norm": 1.0694667100906372, "learning_rate": 6.292971224648352e-05, "loss": 1.5245, "step": 4414 }, { "epoch": 0.6324308838275319, "grad_norm": 1.0529897212982178, "learning_rate": 6.288662424385148e-05, "loss": 1.4631, "step": 4415 }, { "epoch": 0.6325741297808337, "grad_norm": 1.196317434310913, "learning_rate": 6.284354423081083e-05, "loss": 1.4685, "step": 4416 }, { "epoch": 0.6327173757341356, "grad_norm": 1.1030393838882446, "learning_rate": 6.280047221663558e-05, "loss": 1.3836, "step": 4417 }, { "epoch": 0.6328606216874373, "grad_norm": 1.1806074380874634, "learning_rate": 6.275740821059817e-05, "loss": 1.3812, "step": 4418 }, { "epoch": 0.6330038676407391, "grad_norm": 1.0087110996246338, "learning_rate": 6.271435222196916e-05, "loss": 1.4381, "step": 4419 }, { "epoch": 0.633147113594041, "grad_norm": 1.0950722694396973, "learning_rate": 6.267130426001742e-05, "loss": 1.4961, "step": 4420 }, { "epoch": 0.6332903595473428, "grad_norm": 1.0456870794296265, "learning_rate": 6.262826433401015e-05, "loss": 1.467, "step": 4421 }, { "epoch": 0.6334336055006446, "grad_norm": 1.065233588218689, "learning_rate": 6.258523245321274e-05, "loss": 1.3606, "step": 4422 }, { "epoch": 0.6335768514539464, "grad_norm": 0.9999430775642395, "learning_rate": 6.254220862688889e-05, "loss": 1.4896, "step": 4423 }, { "epoch": 0.6337200974072482, "grad_norm": 0.8572548627853394, "learning_rate": 6.249919286430063e-05, "loss": 1.3753, "step": 4424 }, { "epoch": 0.6338633433605501, "grad_norm": 1.059081792831421, "learning_rate": 6.245618517470813e-05, "loss": 1.3788, "step": 4425 }, { "epoch": 0.6340065893138519, "grad_norm": 1.0721956491470337, "learning_rate": 6.24131855673699e-05, "loss": 1.24, "step": 4426 }, { "epoch": 0.6341498352671537, "grad_norm": 1.187609314918518, "learning_rate": 6.23701940515427e-05, "loss": 1.3354, "step": 4427 }, { "epoch": 0.6342930812204555, "grad_norm": 1.0395785570144653, "learning_rate": 6.232721063648148e-05, "loss": 1.4325, "step": 4428 }, { "epoch": 0.6344363271737573, "grad_norm": 1.2745407819747925, "learning_rate": 6.228423533143963e-05, "loss": 1.4736, "step": 4429 }, { "epoch": 0.6345795731270591, "grad_norm": 1.2750049829483032, "learning_rate": 6.224126814566853e-05, "loss": 1.5035, "step": 4430 }, { "epoch": 0.634722819080361, "grad_norm": 1.1220617294311523, "learning_rate": 6.219830908841802e-05, "loss": 1.4499, "step": 4431 }, { "epoch": 0.6348660650336628, "grad_norm": 1.1072580814361572, "learning_rate": 6.215535816893615e-05, "loss": 1.3874, "step": 4432 }, { "epoch": 0.6350093109869647, "grad_norm": 0.966542661190033, "learning_rate": 6.211241539646913e-05, "loss": 1.386, "step": 4433 }, { "epoch": 0.6351525569402664, "grad_norm": 1.1650657653808594, "learning_rate": 6.206948078026154e-05, "loss": 1.4699, "step": 4434 }, { "epoch": 0.6352958028935682, "grad_norm": 0.9655526280403137, "learning_rate": 6.202655432955604e-05, "loss": 1.4294, "step": 4435 }, { "epoch": 0.6354390488468701, "grad_norm": 1.0863144397735596, "learning_rate": 6.198363605359373e-05, "loss": 1.3428, "step": 4436 }, { "epoch": 0.6355822948001719, "grad_norm": 1.1797558069229126, "learning_rate": 6.194072596161383e-05, "loss": 1.3891, "step": 4437 }, { "epoch": 0.6357255407534738, "grad_norm": 0.9600929617881775, "learning_rate": 6.18978240628538e-05, "loss": 1.3947, "step": 4438 }, { "epoch": 0.6358687867067755, "grad_norm": 1.1644914150238037, "learning_rate": 6.185493036654934e-05, "loss": 1.3252, "step": 4439 }, { "epoch": 0.6360120326600773, "grad_norm": 1.5045429468154907, "learning_rate": 6.181204488193446e-05, "loss": 1.5303, "step": 4440 }, { "epoch": 0.6361552786133792, "grad_norm": 0.9693203568458557, "learning_rate": 6.176916761824129e-05, "loss": 1.464, "step": 4441 }, { "epoch": 0.636298524566681, "grad_norm": 1.0141738653182983, "learning_rate": 6.172629858470031e-05, "loss": 1.5092, "step": 4442 }, { "epoch": 0.6364417705199829, "grad_norm": 1.377707600593567, "learning_rate": 6.168343779054009e-05, "loss": 1.4183, "step": 4443 }, { "epoch": 0.6365850164732846, "grad_norm": 1.1417499780654907, "learning_rate": 6.16405852449875e-05, "loss": 1.3681, "step": 4444 }, { "epoch": 0.6367282624265864, "grad_norm": 1.6364623308181763, "learning_rate": 6.159774095726771e-05, "loss": 1.4026, "step": 4445 }, { "epoch": 0.6368715083798883, "grad_norm": 1.557728886604309, "learning_rate": 6.155490493660394e-05, "loss": 1.4005, "step": 4446 }, { "epoch": 0.6370147543331901, "grad_norm": 1.1836497783660889, "learning_rate": 6.151207719221778e-05, "loss": 1.4201, "step": 4447 }, { "epoch": 0.6371580002864919, "grad_norm": 1.101131796836853, "learning_rate": 6.146925773332899e-05, "loss": 1.4411, "step": 4448 }, { "epoch": 0.6373012462397937, "grad_norm": 1.0415374040603638, "learning_rate": 6.14264465691555e-05, "loss": 1.5427, "step": 4449 }, { "epoch": 0.6374444921930955, "grad_norm": 1.0730514526367188, "learning_rate": 6.138364370891354e-05, "loss": 1.5127, "step": 4450 }, { "epoch": 0.6375877381463974, "grad_norm": 1.064699411392212, "learning_rate": 6.134084916181746e-05, "loss": 1.4927, "step": 4451 }, { "epoch": 0.6377309840996992, "grad_norm": 1.001696228981018, "learning_rate": 6.129806293707989e-05, "loss": 1.3577, "step": 4452 }, { "epoch": 0.637874230053001, "grad_norm": 0.982567548751831, "learning_rate": 6.125528504391167e-05, "loss": 1.4494, "step": 4453 }, { "epoch": 0.6380174760063029, "grad_norm": 1.213464379310608, "learning_rate": 6.121251549152178e-05, "loss": 1.3442, "step": 4454 }, { "epoch": 0.6381607219596046, "grad_norm": 1.1963921785354614, "learning_rate": 6.116975428911744e-05, "loss": 1.4634, "step": 4455 }, { "epoch": 0.6383039679129064, "grad_norm": 0.9238240122795105, "learning_rate": 6.112700144590416e-05, "loss": 1.5199, "step": 4456 }, { "epoch": 0.6384472138662083, "grad_norm": 1.1160914897918701, "learning_rate": 6.108425697108546e-05, "loss": 1.5136, "step": 4457 }, { "epoch": 0.6385904598195101, "grad_norm": 1.106880784034729, "learning_rate": 6.104152087386325e-05, "loss": 1.4349, "step": 4458 }, { "epoch": 0.638733705772812, "grad_norm": 1.2798396348953247, "learning_rate": 6.0998793163437505e-05, "loss": 1.4765, "step": 4459 }, { "epoch": 0.6388769517261137, "grad_norm": 0.9928169250488281, "learning_rate": 6.0956073849006456e-05, "loss": 1.4168, "step": 4460 }, { "epoch": 0.6390201976794155, "grad_norm": 1.0597409009933472, "learning_rate": 6.091336293976655e-05, "loss": 1.3773, "step": 4461 }, { "epoch": 0.6391634436327174, "grad_norm": 1.1796460151672363, "learning_rate": 6.087066044491232e-05, "loss": 1.3276, "step": 4462 }, { "epoch": 0.6393066895860192, "grad_norm": 1.371848225593567, "learning_rate": 6.0827966373636656e-05, "loss": 1.3714, "step": 4463 }, { "epoch": 0.6394499355393211, "grad_norm": 1.1110001802444458, "learning_rate": 6.078528073513041e-05, "loss": 1.5113, "step": 4464 }, { "epoch": 0.6395931814926228, "grad_norm": 1.0719579458236694, "learning_rate": 6.0742603538582835e-05, "loss": 1.4711, "step": 4465 }, { "epoch": 0.6397364274459246, "grad_norm": 1.136971354484558, "learning_rate": 6.069993479318126e-05, "loss": 1.3748, "step": 4466 }, { "epoch": 0.6398796733992265, "grad_norm": 1.0547744035720825, "learning_rate": 6.065727450811115e-05, "loss": 1.4879, "step": 4467 }, { "epoch": 0.6400229193525283, "grad_norm": 1.0067002773284912, "learning_rate": 6.061462269255629e-05, "loss": 1.4133, "step": 4468 }, { "epoch": 0.6401661653058301, "grad_norm": 1.071082353591919, "learning_rate": 6.057197935569854e-05, "loss": 1.5086, "step": 4469 }, { "epoch": 0.6403094112591319, "grad_norm": 1.0285546779632568, "learning_rate": 6.0529344506717935e-05, "loss": 1.4429, "step": 4470 }, { "epoch": 0.6404526572124337, "grad_norm": 1.017897129058838, "learning_rate": 6.0486718154792724e-05, "loss": 1.3756, "step": 4471 }, { "epoch": 0.6405959031657356, "grad_norm": 1.3890084028244019, "learning_rate": 6.044410030909926e-05, "loss": 1.4051, "step": 4472 }, { "epoch": 0.6407391491190374, "grad_norm": 0.9783182740211487, "learning_rate": 6.040149097881214e-05, "loss": 1.4816, "step": 4473 }, { "epoch": 0.6408823950723392, "grad_norm": 1.0839159488677979, "learning_rate": 6.035889017310414e-05, "loss": 1.5641, "step": 4474 }, { "epoch": 0.6410256410256411, "grad_norm": 1.076854944229126, "learning_rate": 6.0316297901146103e-05, "loss": 1.513, "step": 4475 }, { "epoch": 0.6411688869789428, "grad_norm": 1.2682785987854004, "learning_rate": 6.02737141721071e-05, "loss": 1.5, "step": 4476 }, { "epoch": 0.6413121329322446, "grad_norm": 1.1027729511260986, "learning_rate": 6.023113899515438e-05, "loss": 1.5581, "step": 4477 }, { "epoch": 0.6414553788855465, "grad_norm": 1.2295446395874023, "learning_rate": 6.0188572379453276e-05, "loss": 1.3184, "step": 4478 }, { "epoch": 0.6415986248388483, "grad_norm": 0.8571249842643738, "learning_rate": 6.014601433416741e-05, "loss": 1.4203, "step": 4479 }, { "epoch": 0.6417418707921502, "grad_norm": 1.0159034729003906, "learning_rate": 6.010346486845837e-05, "loss": 1.3116, "step": 4480 }, { "epoch": 0.6418851167454519, "grad_norm": 1.0799195766448975, "learning_rate": 6.0060923991486084e-05, "loss": 1.2659, "step": 4481 }, { "epoch": 0.6420283626987537, "grad_norm": 1.0707416534423828, "learning_rate": 6.001839171240853e-05, "loss": 1.3262, "step": 4482 }, { "epoch": 0.6421716086520556, "grad_norm": 1.0587345361709595, "learning_rate": 5.9975868040381844e-05, "loss": 1.4757, "step": 4483 }, { "epoch": 0.6423148546053574, "grad_norm": 1.024596095085144, "learning_rate": 5.9933352984560334e-05, "loss": 1.51, "step": 4484 }, { "epoch": 0.6424581005586593, "grad_norm": 0.9928714632987976, "learning_rate": 5.98908465540964e-05, "loss": 1.4354, "step": 4485 }, { "epoch": 0.642601346511961, "grad_norm": 1.0642262697219849, "learning_rate": 5.9848348758140674e-05, "loss": 1.4156, "step": 4486 }, { "epoch": 0.6427445924652628, "grad_norm": 0.9713102579116821, "learning_rate": 5.980585960584187e-05, "loss": 1.5137, "step": 4487 }, { "epoch": 0.6428878384185647, "grad_norm": 1.1548058986663818, "learning_rate": 5.976337910634684e-05, "loss": 1.4686, "step": 4488 }, { "epoch": 0.6430310843718665, "grad_norm": 1.0420300960540771, "learning_rate": 5.972090726880055e-05, "loss": 1.3066, "step": 4489 }, { "epoch": 0.6431743303251684, "grad_norm": 1.2319544553756714, "learning_rate": 5.967844410234624e-05, "loss": 1.3301, "step": 4490 }, { "epoch": 0.6433175762784701, "grad_norm": 1.076306939125061, "learning_rate": 5.963598961612506e-05, "loss": 1.5036, "step": 4491 }, { "epoch": 0.6434608222317719, "grad_norm": 1.0033247470855713, "learning_rate": 5.9593543819276486e-05, "loss": 1.3613, "step": 4492 }, { "epoch": 0.6436040681850738, "grad_norm": 0.965067982673645, "learning_rate": 5.955110672093801e-05, "loss": 1.399, "step": 4493 }, { "epoch": 0.6437473141383756, "grad_norm": 0.9814190864562988, "learning_rate": 5.950867833024529e-05, "loss": 1.464, "step": 4494 }, { "epoch": 0.6438905600916774, "grad_norm": 0.9549692273139954, "learning_rate": 5.946625865633216e-05, "loss": 1.4978, "step": 4495 }, { "epoch": 0.6440338060449793, "grad_norm": 1.1571122407913208, "learning_rate": 5.942384770833045e-05, "loss": 1.6527, "step": 4496 }, { "epoch": 0.644177051998281, "grad_norm": 1.2206703424453735, "learning_rate": 5.938144549537023e-05, "loss": 1.383, "step": 4497 }, { "epoch": 0.6443202979515829, "grad_norm": 1.016944169998169, "learning_rate": 5.9339052026579654e-05, "loss": 1.5799, "step": 4498 }, { "epoch": 0.6444635439048847, "grad_norm": 1.0843195915222168, "learning_rate": 5.929666731108497e-05, "loss": 1.3369, "step": 4499 }, { "epoch": 0.6446067898581865, "grad_norm": 0.9656053781509399, "learning_rate": 5.9254291358010586e-05, "loss": 1.5175, "step": 4500 }, { "epoch": 0.6447500358114884, "grad_norm": 1.0099884271621704, "learning_rate": 5.9211924176478915e-05, "loss": 1.4064, "step": 4501 }, { "epoch": 0.6448932817647901, "grad_norm": 0.9601247906684875, "learning_rate": 5.9169565775610656e-05, "loss": 1.3782, "step": 4502 }, { "epoch": 0.645036527718092, "grad_norm": 0.9413478970527649, "learning_rate": 5.9127216164524504e-05, "loss": 1.5851, "step": 4503 }, { "epoch": 0.6451797736713938, "grad_norm": 0.9864361882209778, "learning_rate": 5.908487535233725e-05, "loss": 1.3832, "step": 4504 }, { "epoch": 0.6453230196246956, "grad_norm": 1.1628612279891968, "learning_rate": 5.904254334816381e-05, "loss": 1.3238, "step": 4505 }, { "epoch": 0.6454662655779975, "grad_norm": 1.2943830490112305, "learning_rate": 5.900022016111733e-05, "loss": 1.5005, "step": 4506 }, { "epoch": 0.6456095115312992, "grad_norm": 1.0280368328094482, "learning_rate": 5.895790580030879e-05, "loss": 1.4286, "step": 4507 }, { "epoch": 0.645752757484601, "grad_norm": 1.1297389268875122, "learning_rate": 5.891560027484753e-05, "loss": 1.3872, "step": 4508 }, { "epoch": 0.6458960034379029, "grad_norm": 1.0245985984802246, "learning_rate": 5.8873303593840846e-05, "loss": 1.5299, "step": 4509 }, { "epoch": 0.6460392493912047, "grad_norm": 1.1804975271224976, "learning_rate": 5.883101576639415e-05, "loss": 1.3902, "step": 4510 }, { "epoch": 0.6461824953445066, "grad_norm": 0.9928570985794067, "learning_rate": 5.878873680161101e-05, "loss": 1.4612, "step": 4511 }, { "epoch": 0.6463257412978083, "grad_norm": 1.0377628803253174, "learning_rate": 5.8746466708592986e-05, "loss": 1.4976, "step": 4512 }, { "epoch": 0.6464689872511101, "grad_norm": 1.104115605354309, "learning_rate": 5.870420549643987e-05, "loss": 1.4478, "step": 4513 }, { "epoch": 0.646612233204412, "grad_norm": 1.1387014389038086, "learning_rate": 5.866195317424934e-05, "loss": 1.4754, "step": 4514 }, { "epoch": 0.6467554791577138, "grad_norm": 0.9884274005889893, "learning_rate": 5.8619709751117344e-05, "loss": 1.4509, "step": 4515 }, { "epoch": 0.6468987251110156, "grad_norm": 1.0251950025558472, "learning_rate": 5.8577475236137855e-05, "loss": 1.3572, "step": 4516 }, { "epoch": 0.6470419710643174, "grad_norm": 1.1747344732284546, "learning_rate": 5.853524963840289e-05, "loss": 1.4893, "step": 4517 }, { "epoch": 0.6471852170176192, "grad_norm": 1.1077133417129517, "learning_rate": 5.849303296700257e-05, "loss": 1.4538, "step": 4518 }, { "epoch": 0.6473284629709211, "grad_norm": 1.3253967761993408, "learning_rate": 5.845082523102514e-05, "loss": 1.3669, "step": 4519 }, { "epoch": 0.6474717089242229, "grad_norm": 0.9842604994773865, "learning_rate": 5.8408626439556845e-05, "loss": 1.4237, "step": 4520 }, { "epoch": 0.6476149548775247, "grad_norm": 1.1810357570648193, "learning_rate": 5.8366436601682084e-05, "loss": 1.3325, "step": 4521 }, { "epoch": 0.6477582008308266, "grad_norm": 1.1851726770401, "learning_rate": 5.832425572648317e-05, "loss": 1.3467, "step": 4522 }, { "epoch": 0.6479014467841283, "grad_norm": 1.0546855926513672, "learning_rate": 5.828208382304072e-05, "loss": 1.3371, "step": 4523 }, { "epoch": 0.6480446927374302, "grad_norm": 0.9989160895347595, "learning_rate": 5.823992090043333e-05, "loss": 1.3331, "step": 4524 }, { "epoch": 0.648187938690732, "grad_norm": 1.0630459785461426, "learning_rate": 5.819776696773751e-05, "loss": 1.4482, "step": 4525 }, { "epoch": 0.6483311846440338, "grad_norm": 1.0202382802963257, "learning_rate": 5.815562203402798e-05, "loss": 1.6199, "step": 4526 }, { "epoch": 0.6484744305973357, "grad_norm": 1.162812352180481, "learning_rate": 5.8113486108377615e-05, "loss": 1.2535, "step": 4527 }, { "epoch": 0.6486176765506374, "grad_norm": 1.006460428237915, "learning_rate": 5.8071359199857114e-05, "loss": 1.4937, "step": 4528 }, { "epoch": 0.6487609225039392, "grad_norm": 1.1499429941177368, "learning_rate": 5.802924131753542e-05, "loss": 1.6045, "step": 4529 }, { "epoch": 0.6489041684572411, "grad_norm": 1.06486177444458, "learning_rate": 5.798713247047944e-05, "loss": 1.4043, "step": 4530 }, { "epoch": 0.6490474144105429, "grad_norm": 1.292765736579895, "learning_rate": 5.79450326677542e-05, "loss": 1.5262, "step": 4531 }, { "epoch": 0.6491906603638448, "grad_norm": 1.0108109712600708, "learning_rate": 5.790294191842276e-05, "loss": 1.3823, "step": 4532 }, { "epoch": 0.6493339063171465, "grad_norm": 0.9800224304199219, "learning_rate": 5.786086023154609e-05, "loss": 1.4338, "step": 4533 }, { "epoch": 0.6494771522704483, "grad_norm": 0.8256798386573792, "learning_rate": 5.781878761618349e-05, "loss": 1.2739, "step": 4534 }, { "epoch": 0.6496203982237502, "grad_norm": 1.148642897605896, "learning_rate": 5.777672408139212e-05, "loss": 1.4234, "step": 4535 }, { "epoch": 0.649763644177052, "grad_norm": 1.1265605688095093, "learning_rate": 5.773466963622716e-05, "loss": 1.4703, "step": 4536 }, { "epoch": 0.6499068901303539, "grad_norm": 1.0741684436798096, "learning_rate": 5.7692624289741914e-05, "loss": 1.4628, "step": 4537 }, { "epoch": 0.6500501360836556, "grad_norm": 1.138808250427246, "learning_rate": 5.765058805098773e-05, "loss": 1.3964, "step": 4538 }, { "epoch": 0.6501933820369574, "grad_norm": 1.1394202709197998, "learning_rate": 5.7608560929013946e-05, "loss": 1.2188, "step": 4539 }, { "epoch": 0.6503366279902593, "grad_norm": 0.9526465535163879, "learning_rate": 5.756654293286796e-05, "loss": 1.4596, "step": 4540 }, { "epoch": 0.6504798739435611, "grad_norm": 1.0236263275146484, "learning_rate": 5.752453407159522e-05, "loss": 1.5331, "step": 4541 }, { "epoch": 0.650623119896863, "grad_norm": 1.0928754806518555, "learning_rate": 5.7482534354239225e-05, "loss": 1.4247, "step": 4542 }, { "epoch": 0.6507663658501648, "grad_norm": 1.0806907415390015, "learning_rate": 5.74405437898414e-05, "loss": 1.4387, "step": 4543 }, { "epoch": 0.6509096118034665, "grad_norm": 1.1278156042099, "learning_rate": 5.739856238744129e-05, "loss": 1.3394, "step": 4544 }, { "epoch": 0.6510528577567684, "grad_norm": 1.2492507696151733, "learning_rate": 5.735659015607655e-05, "loss": 1.3531, "step": 4545 }, { "epoch": 0.6511961037100702, "grad_norm": 1.0405765771865845, "learning_rate": 5.731462710478264e-05, "loss": 1.4454, "step": 4546 }, { "epoch": 0.651339349663372, "grad_norm": 1.0472185611724854, "learning_rate": 5.7272673242593174e-05, "loss": 1.4223, "step": 4547 }, { "epoch": 0.6514825956166739, "grad_norm": 1.1658077239990234, "learning_rate": 5.723072857853992e-05, "loss": 1.4263, "step": 4548 }, { "epoch": 0.6516258415699756, "grad_norm": 1.0355273485183716, "learning_rate": 5.7188793121652374e-05, "loss": 1.3195, "step": 4549 }, { "epoch": 0.6517690875232774, "grad_norm": 1.2060174942016602, "learning_rate": 5.714686688095825e-05, "loss": 1.3627, "step": 4550 }, { "epoch": 0.6519123334765793, "grad_norm": 1.2119323015213013, "learning_rate": 5.7104949865483246e-05, "loss": 1.3354, "step": 4551 }, { "epoch": 0.6520555794298811, "grad_norm": 0.9646314978599548, "learning_rate": 5.706304208425105e-05, "loss": 1.3961, "step": 4552 }, { "epoch": 0.652198825383183, "grad_norm": 1.051529049873352, "learning_rate": 5.702114354628341e-05, "loss": 1.4272, "step": 4553 }, { "epoch": 0.6523420713364847, "grad_norm": 0.9479196667671204, "learning_rate": 5.697925426059991e-05, "loss": 1.2748, "step": 4554 }, { "epoch": 0.6524853172897865, "grad_norm": 1.058199405670166, "learning_rate": 5.6937374236218424e-05, "loss": 1.3929, "step": 4555 }, { "epoch": 0.6526285632430884, "grad_norm": 1.1558740139007568, "learning_rate": 5.6895503482154666e-05, "loss": 1.2779, "step": 4556 }, { "epoch": 0.6527718091963902, "grad_norm": 1.0622003078460693, "learning_rate": 5.6853642007422294e-05, "loss": 1.3791, "step": 4557 }, { "epoch": 0.6529150551496921, "grad_norm": 1.126219391822815, "learning_rate": 5.681178982103309e-05, "loss": 1.4501, "step": 4558 }, { "epoch": 0.6530583011029938, "grad_norm": 1.1512702703475952, "learning_rate": 5.6769946931996795e-05, "loss": 1.3336, "step": 4559 }, { "epoch": 0.6532015470562956, "grad_norm": 1.2613657712936401, "learning_rate": 5.672811334932116e-05, "loss": 1.2297, "step": 4560 }, { "epoch": 0.6533447930095975, "grad_norm": 1.188358187675476, "learning_rate": 5.668628908201189e-05, "loss": 1.4524, "step": 4561 }, { "epoch": 0.6534880389628993, "grad_norm": 0.9419224858283997, "learning_rate": 5.6644474139072746e-05, "loss": 1.3315, "step": 4562 }, { "epoch": 0.6536312849162011, "grad_norm": 1.0754724740982056, "learning_rate": 5.660266852950547e-05, "loss": 1.4844, "step": 4563 }, { "epoch": 0.653774530869503, "grad_norm": 1.0817204713821411, "learning_rate": 5.6560872262309704e-05, "loss": 1.4058, "step": 4564 }, { "epoch": 0.6539177768228047, "grad_norm": 1.0191084146499634, "learning_rate": 5.651908534648315e-05, "loss": 1.523, "step": 4565 }, { "epoch": 0.6540610227761066, "grad_norm": 0.9848802089691162, "learning_rate": 5.647730779102161e-05, "loss": 1.2908, "step": 4566 }, { "epoch": 0.6542042687294084, "grad_norm": 1.0660760402679443, "learning_rate": 5.6435539604918654e-05, "loss": 1.4286, "step": 4567 }, { "epoch": 0.6543475146827102, "grad_norm": 1.0634881258010864, "learning_rate": 5.639378079716595e-05, "loss": 1.3289, "step": 4568 }, { "epoch": 0.6544907606360121, "grad_norm": 1.004914402961731, "learning_rate": 5.635203137675318e-05, "loss": 1.4854, "step": 4569 }, { "epoch": 0.6546340065893138, "grad_norm": 0.9726465344429016, "learning_rate": 5.631029135266791e-05, "loss": 1.4301, "step": 4570 }, { "epoch": 0.6547772525426157, "grad_norm": 1.1482840776443481, "learning_rate": 5.6268560733895816e-05, "loss": 1.3698, "step": 4571 }, { "epoch": 0.6549204984959175, "grad_norm": 1.2183526754379272, "learning_rate": 5.6226839529420314e-05, "loss": 1.4579, "step": 4572 }, { "epoch": 0.6550637444492193, "grad_norm": 1.2847543954849243, "learning_rate": 5.618512774822311e-05, "loss": 1.3118, "step": 4573 }, { "epoch": 0.6552069904025212, "grad_norm": 1.0733232498168945, "learning_rate": 5.6143425399283664e-05, "loss": 1.3983, "step": 4574 }, { "epoch": 0.6553502363558229, "grad_norm": 1.1844935417175293, "learning_rate": 5.610173249157942e-05, "loss": 1.435, "step": 4575 }, { "epoch": 0.6554934823091247, "grad_norm": 1.1102830171585083, "learning_rate": 5.6060049034085815e-05, "loss": 1.4206, "step": 4576 }, { "epoch": 0.6556367282624266, "grad_norm": 1.1408616304397583, "learning_rate": 5.6018375035776406e-05, "loss": 1.422, "step": 4577 }, { "epoch": 0.6557799742157284, "grad_norm": 1.0308042764663696, "learning_rate": 5.597671050562241e-05, "loss": 1.5397, "step": 4578 }, { "epoch": 0.6559232201690303, "grad_norm": 1.3452590703964233, "learning_rate": 5.5935055452593254e-05, "loss": 1.2149, "step": 4579 }, { "epoch": 0.656066466122332, "grad_norm": 1.1442689895629883, "learning_rate": 5.5893409885656214e-05, "loss": 1.4176, "step": 4580 }, { "epoch": 0.6562097120756338, "grad_norm": 1.1220859289169312, "learning_rate": 5.5851773813776556e-05, "loss": 1.5515, "step": 4581 }, { "epoch": 0.6563529580289357, "grad_norm": 1.0263195037841797, "learning_rate": 5.5810147245917535e-05, "loss": 1.4533, "step": 4582 }, { "epoch": 0.6564962039822375, "grad_norm": 1.2668074369430542, "learning_rate": 5.5768530191040206e-05, "loss": 1.3813, "step": 4583 }, { "epoch": 0.6566394499355394, "grad_norm": 1.0619176626205444, "learning_rate": 5.57269226581038e-05, "loss": 1.3198, "step": 4584 }, { "epoch": 0.6567826958888412, "grad_norm": 1.3197219371795654, "learning_rate": 5.56853246560654e-05, "loss": 1.4631, "step": 4585 }, { "epoch": 0.6569259418421429, "grad_norm": 1.0135531425476074, "learning_rate": 5.564373619387995e-05, "loss": 1.4171, "step": 4586 }, { "epoch": 0.6570691877954448, "grad_norm": 1.0145251750946045, "learning_rate": 5.5602157280500446e-05, "loss": 1.3297, "step": 4587 }, { "epoch": 0.6572124337487466, "grad_norm": 0.9064403772354126, "learning_rate": 5.556058792487779e-05, "loss": 1.4661, "step": 4588 }, { "epoch": 0.6573556797020484, "grad_norm": 1.383042812347412, "learning_rate": 5.551902813596087e-05, "loss": 1.268, "step": 4589 }, { "epoch": 0.6574989256553503, "grad_norm": 0.9954293370246887, "learning_rate": 5.5477477922696465e-05, "loss": 1.4083, "step": 4590 }, { "epoch": 0.657642171608652, "grad_norm": 1.0450252294540405, "learning_rate": 5.543593729402927e-05, "loss": 1.4777, "step": 4591 }, { "epoch": 0.6577854175619539, "grad_norm": 0.8920850157737732, "learning_rate": 5.5394406258902054e-05, "loss": 1.4769, "step": 4592 }, { "epoch": 0.6579286635152557, "grad_norm": 1.0448344945907593, "learning_rate": 5.5352884826255317e-05, "loss": 1.423, "step": 4593 }, { "epoch": 0.6580719094685575, "grad_norm": 1.1138440370559692, "learning_rate": 5.531137300502758e-05, "loss": 1.5368, "step": 4594 }, { "epoch": 0.6582151554218594, "grad_norm": 1.153704047203064, "learning_rate": 5.526987080415543e-05, "loss": 1.4418, "step": 4595 }, { "epoch": 0.6583584013751611, "grad_norm": 1.075050711631775, "learning_rate": 5.522837823257317e-05, "loss": 1.5503, "step": 4596 }, { "epoch": 0.658501647328463, "grad_norm": 1.0047483444213867, "learning_rate": 5.518689529921315e-05, "loss": 1.5202, "step": 4597 }, { "epoch": 0.6586448932817648, "grad_norm": 1.3084261417388916, "learning_rate": 5.514542201300563e-05, "loss": 1.4243, "step": 4598 }, { "epoch": 0.6587881392350666, "grad_norm": 1.054765224456787, "learning_rate": 5.5103958382878765e-05, "loss": 1.4511, "step": 4599 }, { "epoch": 0.6589313851883685, "grad_norm": 1.2251580953598022, "learning_rate": 5.5062504417758684e-05, "loss": 1.5065, "step": 4600 }, { "epoch": 0.6590746311416702, "grad_norm": 1.1300359964370728, "learning_rate": 5.502106012656931e-05, "loss": 1.3949, "step": 4601 }, { "epoch": 0.659217877094972, "grad_norm": 1.0543614625930786, "learning_rate": 5.497962551823266e-05, "loss": 1.5467, "step": 4602 }, { "epoch": 0.6593611230482739, "grad_norm": 1.142508625984192, "learning_rate": 5.493820060166861e-05, "loss": 1.3815, "step": 4603 }, { "epoch": 0.6595043690015757, "grad_norm": 1.1749300956726074, "learning_rate": 5.4896785385794815e-05, "loss": 1.2552, "step": 4604 }, { "epoch": 0.6596476149548776, "grad_norm": 1.2161883115768433, "learning_rate": 5.485537987952696e-05, "loss": 1.5366, "step": 4605 }, { "epoch": 0.6597908609081793, "grad_norm": 1.0623385906219482, "learning_rate": 5.4813984091778734e-05, "loss": 1.3996, "step": 4606 }, { "epoch": 0.6599341068614811, "grad_norm": 1.2170624732971191, "learning_rate": 5.4772598031461507e-05, "loss": 1.3891, "step": 4607 }, { "epoch": 0.660077352814783, "grad_norm": 0.9561026692390442, "learning_rate": 5.473122170748472e-05, "loss": 1.4362, "step": 4608 }, { "epoch": 0.6602205987680848, "grad_norm": 1.2127102613449097, "learning_rate": 5.4689855128755686e-05, "loss": 1.4595, "step": 4609 }, { "epoch": 0.6603638447213867, "grad_norm": 1.1160438060760498, "learning_rate": 5.4648498304179585e-05, "loss": 1.2667, "step": 4610 }, { "epoch": 0.6605070906746885, "grad_norm": 0.9959889054298401, "learning_rate": 5.4607151242659524e-05, "loss": 1.354, "step": 4611 }, { "epoch": 0.6606503366279902, "grad_norm": 1.1645419597625732, "learning_rate": 5.45658139530965e-05, "loss": 1.5773, "step": 4612 }, { "epoch": 0.6607935825812921, "grad_norm": 1.195703148841858, "learning_rate": 5.452448644438946e-05, "loss": 1.5745, "step": 4613 }, { "epoch": 0.6609368285345939, "grad_norm": 1.0137521028518677, "learning_rate": 5.4483168725435086e-05, "loss": 1.3337, "step": 4614 }, { "epoch": 0.6610800744878957, "grad_norm": 1.0424811840057373, "learning_rate": 5.444186080512809e-05, "loss": 1.3422, "step": 4615 }, { "epoch": 0.6612233204411976, "grad_norm": 0.9815017580986023, "learning_rate": 5.4400562692361145e-05, "loss": 1.3115, "step": 4616 }, { "epoch": 0.6613665663944993, "grad_norm": 1.0448380708694458, "learning_rate": 5.435927439602462e-05, "loss": 1.5276, "step": 4617 }, { "epoch": 0.6615098123478012, "grad_norm": 1.0571798086166382, "learning_rate": 5.431799592500686e-05, "loss": 1.3613, "step": 4618 }, { "epoch": 0.661653058301103, "grad_norm": 1.1727370023727417, "learning_rate": 5.427672728819414e-05, "loss": 1.4281, "step": 4619 }, { "epoch": 0.6617963042544048, "grad_norm": 1.043731451034546, "learning_rate": 5.423546849447055e-05, "loss": 1.4378, "step": 4620 }, { "epoch": 0.6619395502077067, "grad_norm": 0.9515613913536072, "learning_rate": 5.419421955271815e-05, "loss": 1.4298, "step": 4621 }, { "epoch": 0.6620827961610084, "grad_norm": 1.2299749851226807, "learning_rate": 5.415298047181667e-05, "loss": 1.6592, "step": 4622 }, { "epoch": 0.6622260421143102, "grad_norm": 1.2164242267608643, "learning_rate": 5.4111751260644e-05, "loss": 1.4575, "step": 4623 }, { "epoch": 0.6623692880676121, "grad_norm": 0.9539395570755005, "learning_rate": 5.407053192807576e-05, "loss": 1.258, "step": 4624 }, { "epoch": 0.6625125340209139, "grad_norm": 1.069482445716858, "learning_rate": 5.40293224829854e-05, "loss": 1.3364, "step": 4625 }, { "epoch": 0.6626557799742158, "grad_norm": 1.0834589004516602, "learning_rate": 5.398812293424426e-05, "loss": 1.3881, "step": 4626 }, { "epoch": 0.6627990259275175, "grad_norm": 0.9015188217163086, "learning_rate": 5.394693329072171e-05, "loss": 1.3456, "step": 4627 }, { "epoch": 0.6629422718808193, "grad_norm": 1.1651769876480103, "learning_rate": 5.390575356128474e-05, "loss": 1.422, "step": 4628 }, { "epoch": 0.6630855178341212, "grad_norm": 1.197929859161377, "learning_rate": 5.386458375479839e-05, "loss": 1.3686, "step": 4629 }, { "epoch": 0.663228763787423, "grad_norm": 1.0539158582687378, "learning_rate": 5.382342388012547e-05, "loss": 1.6378, "step": 4630 }, { "epoch": 0.6633720097407249, "grad_norm": 0.9805400371551514, "learning_rate": 5.37822739461267e-05, "loss": 1.3745, "step": 4631 }, { "epoch": 0.6635152556940267, "grad_norm": 1.1135749816894531, "learning_rate": 5.3741133961660686e-05, "loss": 1.3122, "step": 4632 }, { "epoch": 0.6636585016473284, "grad_norm": 1.2728681564331055, "learning_rate": 5.370000393558371e-05, "loss": 1.6586, "step": 4633 }, { "epoch": 0.6638017476006303, "grad_norm": 1.1571128368377686, "learning_rate": 5.365888387675018e-05, "loss": 1.3125, "step": 4634 }, { "epoch": 0.6639449935539321, "grad_norm": 1.2323644161224365, "learning_rate": 5.361777379401223e-05, "loss": 1.4344, "step": 4635 }, { "epoch": 0.664088239507234, "grad_norm": 0.979188859462738, "learning_rate": 5.357667369621977e-05, "loss": 1.482, "step": 4636 }, { "epoch": 0.6642314854605358, "grad_norm": 0.9748269319534302, "learning_rate": 5.353558359222065e-05, "loss": 1.415, "step": 4637 }, { "epoch": 0.6643747314138375, "grad_norm": 1.4010051488876343, "learning_rate": 5.349450349086057e-05, "loss": 1.4324, "step": 4638 }, { "epoch": 0.6645179773671394, "grad_norm": 1.0288825035095215, "learning_rate": 5.3453433400983055e-05, "loss": 1.409, "step": 4639 }, { "epoch": 0.6646612233204412, "grad_norm": 0.9494627714157104, "learning_rate": 5.3412373331429474e-05, "loss": 1.4856, "step": 4640 }, { "epoch": 0.664804469273743, "grad_norm": 1.1552248001098633, "learning_rate": 5.337132329103907e-05, "loss": 1.4849, "step": 4641 }, { "epoch": 0.6649477152270449, "grad_norm": 1.1238632202148438, "learning_rate": 5.3330283288648906e-05, "loss": 1.5129, "step": 4642 }, { "epoch": 0.6650909611803466, "grad_norm": 1.0481737852096558, "learning_rate": 5.3289253333093826e-05, "loss": 1.5223, "step": 4643 }, { "epoch": 0.6652342071336484, "grad_norm": 1.0552860498428345, "learning_rate": 5.324823343320654e-05, "loss": 1.4586, "step": 4644 }, { "epoch": 0.6653774530869503, "grad_norm": 1.1617873907089233, "learning_rate": 5.320722359781776e-05, "loss": 1.3824, "step": 4645 }, { "epoch": 0.6655206990402521, "grad_norm": 0.9579392075538635, "learning_rate": 5.3166223835755736e-05, "loss": 1.5059, "step": 4646 }, { "epoch": 0.665663944993554, "grad_norm": 1.015425682067871, "learning_rate": 5.312523415584678e-05, "loss": 1.4629, "step": 4647 }, { "epoch": 0.6658071909468557, "grad_norm": 1.1427415609359741, "learning_rate": 5.308425456691495e-05, "loss": 1.3648, "step": 4648 }, { "epoch": 0.6659504369001575, "grad_norm": 0.9164599776268005, "learning_rate": 5.3043285077782114e-05, "loss": 1.3849, "step": 4649 }, { "epoch": 0.6660936828534594, "grad_norm": 1.0506738424301147, "learning_rate": 5.300232569726804e-05, "loss": 1.4166, "step": 4650 }, { "epoch": 0.6662369288067612, "grad_norm": 1.1794301271438599, "learning_rate": 5.2961376434190144e-05, "loss": 1.2502, "step": 4651 }, { "epoch": 0.6663801747600631, "grad_norm": 0.9880931377410889, "learning_rate": 5.292043729736394e-05, "loss": 1.4938, "step": 4652 }, { "epoch": 0.6665234207133649, "grad_norm": 1.02407705783844, "learning_rate": 5.2879508295602575e-05, "loss": 1.4923, "step": 4653 }, { "epoch": 0.6666666666666666, "grad_norm": 1.1631250381469727, "learning_rate": 5.283858943771698e-05, "loss": 1.4391, "step": 4654 }, { "epoch": 0.6668099126199685, "grad_norm": 1.2295377254486084, "learning_rate": 5.2797680732515986e-05, "loss": 1.5335, "step": 4655 }, { "epoch": 0.6669531585732703, "grad_norm": 0.9972642660140991, "learning_rate": 5.275678218880632e-05, "loss": 1.4697, "step": 4656 }, { "epoch": 0.6670964045265722, "grad_norm": 1.0502122640609741, "learning_rate": 5.2715893815392325e-05, "loss": 1.3583, "step": 4657 }, { "epoch": 0.667239650479874, "grad_norm": 1.0837233066558838, "learning_rate": 5.26750156210763e-05, "loss": 1.4316, "step": 4658 }, { "epoch": 0.6673828964331757, "grad_norm": 1.1484932899475098, "learning_rate": 5.2634147614658294e-05, "loss": 1.4295, "step": 4659 }, { "epoch": 0.6675261423864776, "grad_norm": 1.1339203119277954, "learning_rate": 5.259328980493618e-05, "loss": 1.189, "step": 4660 }, { "epoch": 0.6676693883397794, "grad_norm": 0.9505112767219543, "learning_rate": 5.255244220070567e-05, "loss": 1.4944, "step": 4661 }, { "epoch": 0.6678126342930812, "grad_norm": 1.0537834167480469, "learning_rate": 5.251160481076016e-05, "loss": 1.2769, "step": 4662 }, { "epoch": 0.6679558802463831, "grad_norm": 1.139582633972168, "learning_rate": 5.247077764389099e-05, "loss": 1.4404, "step": 4663 }, { "epoch": 0.6680991261996848, "grad_norm": 1.0137072801589966, "learning_rate": 5.242996070888728e-05, "loss": 1.3511, "step": 4664 }, { "epoch": 0.6682423721529867, "grad_norm": 1.0986104011535645, "learning_rate": 5.2389154014535814e-05, "loss": 1.3368, "step": 4665 }, { "epoch": 0.6683856181062885, "grad_norm": 1.0190446376800537, "learning_rate": 5.23483575696213e-05, "loss": 1.4032, "step": 4666 }, { "epoch": 0.6685288640595903, "grad_norm": 1.0660560131072998, "learning_rate": 5.2307571382926215e-05, "loss": 1.3215, "step": 4667 }, { "epoch": 0.6686721100128922, "grad_norm": 1.0610193014144897, "learning_rate": 5.226679546323079e-05, "loss": 1.3175, "step": 4668 }, { "epoch": 0.6688153559661939, "grad_norm": 1.042181372642517, "learning_rate": 5.222602981931309e-05, "loss": 1.3867, "step": 4669 }, { "epoch": 0.6689586019194957, "grad_norm": 0.9854345917701721, "learning_rate": 5.2185274459948965e-05, "loss": 1.4576, "step": 4670 }, { "epoch": 0.6691018478727976, "grad_norm": 0.9518062472343445, "learning_rate": 5.2144529393912036e-05, "loss": 1.4949, "step": 4671 }, { "epoch": 0.6692450938260994, "grad_norm": 0.9983185529708862, "learning_rate": 5.210379462997364e-05, "loss": 1.6027, "step": 4672 }, { "epoch": 0.6693883397794013, "grad_norm": 1.495338797569275, "learning_rate": 5.206307017690302e-05, "loss": 1.5165, "step": 4673 }, { "epoch": 0.6695315857327031, "grad_norm": 0.9914500713348389, "learning_rate": 5.202235604346719e-05, "loss": 1.3573, "step": 4674 }, { "epoch": 0.6696748316860048, "grad_norm": 1.0878039598464966, "learning_rate": 5.19816522384308e-05, "loss": 1.4245, "step": 4675 }, { "epoch": 0.6698180776393067, "grad_norm": 1.0673552751541138, "learning_rate": 5.1940958770556383e-05, "loss": 1.3157, "step": 4676 }, { "epoch": 0.6699613235926085, "grad_norm": 1.0680954456329346, "learning_rate": 5.190027564860433e-05, "loss": 1.4977, "step": 4677 }, { "epoch": 0.6701045695459104, "grad_norm": 0.9051030874252319, "learning_rate": 5.185960288133261e-05, "loss": 1.355, "step": 4678 }, { "epoch": 0.6702478154992122, "grad_norm": 1.0885728597640991, "learning_rate": 5.181894047749711e-05, "loss": 1.5478, "step": 4679 }, { "epoch": 0.6703910614525139, "grad_norm": 1.2315906286239624, "learning_rate": 5.177828844585142e-05, "loss": 1.2399, "step": 4680 }, { "epoch": 0.6705343074058158, "grad_norm": 1.2804405689239502, "learning_rate": 5.173764679514693e-05, "loss": 1.2929, "step": 4681 }, { "epoch": 0.6706775533591176, "grad_norm": 1.2457267045974731, "learning_rate": 5.169701553413282e-05, "loss": 1.4978, "step": 4682 }, { "epoch": 0.6708207993124194, "grad_norm": 1.1037180423736572, "learning_rate": 5.1656394671555875e-05, "loss": 1.3802, "step": 4683 }, { "epoch": 0.6709640452657213, "grad_norm": 1.25758695602417, "learning_rate": 5.161578421616087e-05, "loss": 1.5493, "step": 4684 }, { "epoch": 0.671107291219023, "grad_norm": 0.997965931892395, "learning_rate": 5.157518417669023e-05, "loss": 1.1578, "step": 4685 }, { "epoch": 0.6712505371723249, "grad_norm": 1.0033360719680786, "learning_rate": 5.1534594561884086e-05, "loss": 1.3032, "step": 4686 }, { "epoch": 0.6713937831256267, "grad_norm": 1.1388763189315796, "learning_rate": 5.1494015380480396e-05, "loss": 1.1793, "step": 4687 }, { "epoch": 0.6715370290789285, "grad_norm": 1.1157748699188232, "learning_rate": 5.1453446641214854e-05, "loss": 1.2793, "step": 4688 }, { "epoch": 0.6716802750322304, "grad_norm": 1.1203961372375488, "learning_rate": 5.141288835282092e-05, "loss": 1.4593, "step": 4689 }, { "epoch": 0.6718235209855321, "grad_norm": 1.197005271911621, "learning_rate": 5.137234052402977e-05, "loss": 1.4407, "step": 4690 }, { "epoch": 0.671966766938834, "grad_norm": 1.1796092987060547, "learning_rate": 5.133180316357036e-05, "loss": 1.308, "step": 4691 }, { "epoch": 0.6721100128921358, "grad_norm": 1.0350306034088135, "learning_rate": 5.129127628016941e-05, "loss": 1.2719, "step": 4692 }, { "epoch": 0.6722532588454376, "grad_norm": 1.1337361335754395, "learning_rate": 5.1250759882551284e-05, "loss": 1.3773, "step": 4693 }, { "epoch": 0.6723965047987395, "grad_norm": 1.2108922004699707, "learning_rate": 5.1210253979438174e-05, "loss": 1.427, "step": 4694 }, { "epoch": 0.6725397507520413, "grad_norm": 1.15889310836792, "learning_rate": 5.1169758579550084e-05, "loss": 1.2235, "step": 4695 }, { "epoch": 0.672682996705343, "grad_norm": 0.9420252442359924, "learning_rate": 5.112927369160458e-05, "loss": 1.5244, "step": 4696 }, { "epoch": 0.6728262426586449, "grad_norm": 0.9551871418952942, "learning_rate": 5.108879932431709e-05, "loss": 1.5211, "step": 4697 }, { "epoch": 0.6729694886119467, "grad_norm": 1.2036726474761963, "learning_rate": 5.104833548640074e-05, "loss": 1.2473, "step": 4698 }, { "epoch": 0.6731127345652486, "grad_norm": 1.0814273357391357, "learning_rate": 5.10078821865664e-05, "loss": 1.2721, "step": 4699 }, { "epoch": 0.6732559805185504, "grad_norm": 0.9679111838340759, "learning_rate": 5.096743943352269e-05, "loss": 1.4724, "step": 4700 }, { "epoch": 0.6733992264718521, "grad_norm": 1.0734469890594482, "learning_rate": 5.092700723597583e-05, "loss": 1.4583, "step": 4701 }, { "epoch": 0.673542472425154, "grad_norm": 1.2774831056594849, "learning_rate": 5.088658560262998e-05, "loss": 1.4771, "step": 4702 }, { "epoch": 0.6736857183784558, "grad_norm": 1.0236616134643555, "learning_rate": 5.084617454218692e-05, "loss": 1.3778, "step": 4703 }, { "epoch": 0.6738289643317577, "grad_norm": 1.3890806436538696, "learning_rate": 5.080577406334607e-05, "loss": 1.424, "step": 4704 }, { "epoch": 0.6739722102850595, "grad_norm": 1.048463225364685, "learning_rate": 5.076538417480465e-05, "loss": 1.6065, "step": 4705 }, { "epoch": 0.6741154562383612, "grad_norm": 1.2354425191879272, "learning_rate": 5.072500488525775e-05, "loss": 1.3801, "step": 4706 }, { "epoch": 0.6742587021916631, "grad_norm": 1.0977635383605957, "learning_rate": 5.068463620339787e-05, "loss": 1.3023, "step": 4707 }, { "epoch": 0.6744019481449649, "grad_norm": 0.9004481434822083, "learning_rate": 5.064427813791547e-05, "loss": 1.4373, "step": 4708 }, { "epoch": 0.6745451940982667, "grad_norm": 1.1374307870864868, "learning_rate": 5.0603930697498605e-05, "loss": 1.6981, "step": 4709 }, { "epoch": 0.6746884400515686, "grad_norm": 1.0072087049484253, "learning_rate": 5.0563593890833115e-05, "loss": 1.3042, "step": 4710 }, { "epoch": 0.6748316860048703, "grad_norm": 1.370752215385437, "learning_rate": 5.052326772660252e-05, "loss": 1.4036, "step": 4711 }, { "epoch": 0.6749749319581722, "grad_norm": 1.3025410175323486, "learning_rate": 5.048295221348795e-05, "loss": 1.4296, "step": 4712 }, { "epoch": 0.675118177911474, "grad_norm": 1.000956416130066, "learning_rate": 5.044264736016846e-05, "loss": 1.4186, "step": 4713 }, { "epoch": 0.6752614238647758, "grad_norm": 1.4790866374969482, "learning_rate": 5.040235317532067e-05, "loss": 1.4009, "step": 4714 }, { "epoch": 0.6754046698180777, "grad_norm": 1.0980147123336792, "learning_rate": 5.036206966761887e-05, "loss": 1.3314, "step": 4715 }, { "epoch": 0.6755479157713794, "grad_norm": 1.0527515411376953, "learning_rate": 5.03217968457351e-05, "loss": 1.3224, "step": 4716 }, { "epoch": 0.6756911617246812, "grad_norm": 1.2290985584259033, "learning_rate": 5.028153471833914e-05, "loss": 1.3974, "step": 4717 }, { "epoch": 0.6758344076779831, "grad_norm": 1.0816236734390259, "learning_rate": 5.024128329409841e-05, "loss": 1.4561, "step": 4718 }, { "epoch": 0.6759776536312849, "grad_norm": 1.0314141511917114, "learning_rate": 5.0201042581678034e-05, "loss": 1.5268, "step": 4719 }, { "epoch": 0.6761208995845868, "grad_norm": 1.0151269435882568, "learning_rate": 5.016081258974088e-05, "loss": 1.2441, "step": 4720 }, { "epoch": 0.6762641455378886, "grad_norm": 1.0514795780181885, "learning_rate": 5.012059332694747e-05, "loss": 1.3433, "step": 4721 }, { "epoch": 0.6764073914911903, "grad_norm": 1.162483811378479, "learning_rate": 5.008038480195596e-05, "loss": 1.5176, "step": 4722 }, { "epoch": 0.6765506374444922, "grad_norm": 0.9874649047851562, "learning_rate": 5.0040187023422237e-05, "loss": 1.4608, "step": 4723 }, { "epoch": 0.676693883397794, "grad_norm": 1.122450828552246, "learning_rate": 5.000000000000002e-05, "loss": 1.2824, "step": 4724 }, { "epoch": 0.6768371293510959, "grad_norm": 1.218967080116272, "learning_rate": 4.995982374034046e-05, "loss": 1.4422, "step": 4725 }, { "epoch": 0.6769803753043977, "grad_norm": 1.1774418354034424, "learning_rate": 4.991965825309254e-05, "loss": 1.4671, "step": 4726 }, { "epoch": 0.6771236212576994, "grad_norm": 1.1773892641067505, "learning_rate": 4.9879503546902906e-05, "loss": 1.4382, "step": 4727 }, { "epoch": 0.6772668672110013, "grad_norm": 1.0624511241912842, "learning_rate": 4.983935963041586e-05, "loss": 1.398, "step": 4728 }, { "epoch": 0.6774101131643031, "grad_norm": 1.0673298835754395, "learning_rate": 4.979922651227346e-05, "loss": 1.2745, "step": 4729 }, { "epoch": 0.677553359117605, "grad_norm": 1.0545941591262817, "learning_rate": 4.9759104201115214e-05, "loss": 1.5376, "step": 4730 }, { "epoch": 0.6776966050709068, "grad_norm": 1.152147889137268, "learning_rate": 4.9718992705578615e-05, "loss": 1.401, "step": 4731 }, { "epoch": 0.6778398510242085, "grad_norm": 1.0504961013793945, "learning_rate": 4.967889203429866e-05, "loss": 1.2607, "step": 4732 }, { "epoch": 0.6779830969775104, "grad_norm": 1.0604902505874634, "learning_rate": 4.9638802195907976e-05, "loss": 1.3426, "step": 4733 }, { "epoch": 0.6781263429308122, "grad_norm": 0.9292310476303101, "learning_rate": 4.9598723199036865e-05, "loss": 1.3828, "step": 4734 }, { "epoch": 0.678269588884114, "grad_norm": 1.0685092210769653, "learning_rate": 4.955865505231351e-05, "loss": 1.397, "step": 4735 }, { "epoch": 0.6784128348374159, "grad_norm": 1.057086706161499, "learning_rate": 4.951859776436345e-05, "loss": 1.5747, "step": 4736 }, { "epoch": 0.6785560807907176, "grad_norm": 1.0746482610702515, "learning_rate": 4.947855134381007e-05, "loss": 1.4448, "step": 4737 }, { "epoch": 0.6786993267440194, "grad_norm": 0.9867801666259766, "learning_rate": 4.943851579927436e-05, "loss": 1.4783, "step": 4738 }, { "epoch": 0.6788425726973213, "grad_norm": 1.0670701265335083, "learning_rate": 4.9398491139375016e-05, "loss": 1.3113, "step": 4739 }, { "epoch": 0.6789858186506231, "grad_norm": 1.1904017925262451, "learning_rate": 4.9358477372728326e-05, "loss": 1.51, "step": 4740 }, { "epoch": 0.679129064603925, "grad_norm": 1.051641821861267, "learning_rate": 4.931847450794828e-05, "loss": 1.4968, "step": 4741 }, { "epoch": 0.6792723105572268, "grad_norm": 1.1551966667175293, "learning_rate": 4.9278482553646535e-05, "loss": 1.3866, "step": 4742 }, { "epoch": 0.6794155565105285, "grad_norm": 1.1434199810028076, "learning_rate": 4.923850151843229e-05, "loss": 1.5198, "step": 4743 }, { "epoch": 0.6795588024638304, "grad_norm": 0.9995795488357544, "learning_rate": 4.9198531410912486e-05, "loss": 1.4389, "step": 4744 }, { "epoch": 0.6797020484171322, "grad_norm": 1.159527063369751, "learning_rate": 4.91585722396918e-05, "loss": 1.6115, "step": 4745 }, { "epoch": 0.6798452943704341, "grad_norm": 0.994971513748169, "learning_rate": 4.911862401337234e-05, "loss": 1.3416, "step": 4746 }, { "epoch": 0.6799885403237359, "grad_norm": 1.267449975013733, "learning_rate": 4.907868674055401e-05, "loss": 1.5255, "step": 4747 }, { "epoch": 0.6801317862770376, "grad_norm": 1.2041118144989014, "learning_rate": 4.903876042983433e-05, "loss": 1.3445, "step": 4748 }, { "epoch": 0.6802750322303395, "grad_norm": 1.215191125869751, "learning_rate": 4.899884508980843e-05, "loss": 1.1965, "step": 4749 }, { "epoch": 0.6804182781836413, "grad_norm": 1.2949573993682861, "learning_rate": 4.8958940729069134e-05, "loss": 1.3193, "step": 4750 }, { "epoch": 0.6805615241369432, "grad_norm": 1.178986668586731, "learning_rate": 4.891904735620675e-05, "loss": 1.501, "step": 4751 }, { "epoch": 0.680704770090245, "grad_norm": 1.1801514625549316, "learning_rate": 4.8879164979809454e-05, "loss": 1.4221, "step": 4752 }, { "epoch": 0.6808480160435467, "grad_norm": 1.0119770765304565, "learning_rate": 4.883929360846293e-05, "loss": 1.4934, "step": 4753 }, { "epoch": 0.6809912619968486, "grad_norm": 1.076130986213684, "learning_rate": 4.8799433250750425e-05, "loss": 1.5046, "step": 4754 }, { "epoch": 0.6811345079501504, "grad_norm": 0.8642748594284058, "learning_rate": 4.875958391525289e-05, "loss": 1.5181, "step": 4755 }, { "epoch": 0.6812777539034522, "grad_norm": 1.0306544303894043, "learning_rate": 4.871974561054901e-05, "loss": 1.3963, "step": 4756 }, { "epoch": 0.6814209998567541, "grad_norm": 1.233858585357666, "learning_rate": 4.867991834521487e-05, "loss": 1.4668, "step": 4757 }, { "epoch": 0.6815642458100558, "grad_norm": 1.132620930671692, "learning_rate": 4.864010212782434e-05, "loss": 1.4424, "step": 4758 }, { "epoch": 0.6817074917633577, "grad_norm": 1.2647408246994019, "learning_rate": 4.860029696694886e-05, "loss": 1.352, "step": 4759 }, { "epoch": 0.6818507377166595, "grad_norm": 1.1134819984436035, "learning_rate": 4.8560502871157485e-05, "loss": 1.5445, "step": 4760 }, { "epoch": 0.6819939836699613, "grad_norm": 0.9433553218841553, "learning_rate": 4.852071984901696e-05, "loss": 1.3118, "step": 4761 }, { "epoch": 0.6821372296232632, "grad_norm": 1.1843516826629639, "learning_rate": 4.8480947909091454e-05, "loss": 1.2485, "step": 4762 }, { "epoch": 0.682280475576565, "grad_norm": 1.1037076711654663, "learning_rate": 4.8441187059943015e-05, "loss": 1.4771, "step": 4763 }, { "epoch": 0.6824237215298667, "grad_norm": 1.0096940994262695, "learning_rate": 4.840143731013115e-05, "loss": 1.5292, "step": 4764 }, { "epoch": 0.6825669674831686, "grad_norm": 1.1632788181304932, "learning_rate": 4.836169866821293e-05, "loss": 1.4896, "step": 4765 }, { "epoch": 0.6827102134364704, "grad_norm": 0.9995408654212952, "learning_rate": 4.832197114274314e-05, "loss": 1.3944, "step": 4766 }, { "epoch": 0.6828534593897723, "grad_norm": 0.9000145196914673, "learning_rate": 4.8282254742274126e-05, "loss": 1.4949, "step": 4767 }, { "epoch": 0.6829967053430741, "grad_norm": 1.1202291250228882, "learning_rate": 4.8242549475355844e-05, "loss": 1.3469, "step": 4768 }, { "epoch": 0.6831399512963758, "grad_norm": 1.1277642250061035, "learning_rate": 4.820285535053588e-05, "loss": 1.5005, "step": 4769 }, { "epoch": 0.6832831972496777, "grad_norm": 1.1232801675796509, "learning_rate": 4.816317237635937e-05, "loss": 1.3975, "step": 4770 }, { "epoch": 0.6834264432029795, "grad_norm": 1.0864691734313965, "learning_rate": 4.8123500561369115e-05, "loss": 1.3348, "step": 4771 }, { "epoch": 0.6835696891562814, "grad_norm": 1.14359712600708, "learning_rate": 4.808383991410542e-05, "loss": 1.3851, "step": 4772 }, { "epoch": 0.6837129351095832, "grad_norm": 1.1836166381835938, "learning_rate": 4.804419044310622e-05, "loss": 1.4476, "step": 4773 }, { "epoch": 0.6838561810628849, "grad_norm": 1.291280746459961, "learning_rate": 4.800455215690719e-05, "loss": 1.3281, "step": 4774 }, { "epoch": 0.6839994270161868, "grad_norm": 1.0990039110183716, "learning_rate": 4.7964925064041356e-05, "loss": 1.4437, "step": 4775 }, { "epoch": 0.6841426729694886, "grad_norm": 0.929581344127655, "learning_rate": 4.792530917303948e-05, "loss": 1.4478, "step": 4776 }, { "epoch": 0.6842859189227904, "grad_norm": 1.1568092107772827, "learning_rate": 4.7885704492429894e-05, "loss": 1.3816, "step": 4777 }, { "epoch": 0.6844291648760923, "grad_norm": 1.091101884841919, "learning_rate": 4.7846111030738506e-05, "loss": 1.4976, "step": 4778 }, { "epoch": 0.684572410829394, "grad_norm": 0.9966301321983337, "learning_rate": 4.780652879648884e-05, "loss": 1.5102, "step": 4779 }, { "epoch": 0.6847156567826959, "grad_norm": 1.1916595697402954, "learning_rate": 4.776695779820186e-05, "loss": 1.4293, "step": 4780 }, { "epoch": 0.6848589027359977, "grad_norm": 1.0427385568618774, "learning_rate": 4.772739804439633e-05, "loss": 1.4752, "step": 4781 }, { "epoch": 0.6850021486892995, "grad_norm": 1.01448655128479, "learning_rate": 4.7687849543588505e-05, "loss": 1.4508, "step": 4782 }, { "epoch": 0.6851453946426014, "grad_norm": 1.192375659942627, "learning_rate": 4.764831230429211e-05, "loss": 1.3124, "step": 4783 }, { "epoch": 0.6852886405959032, "grad_norm": 0.9594294428825378, "learning_rate": 4.760878633501853e-05, "loss": 1.3555, "step": 4784 }, { "epoch": 0.685431886549205, "grad_norm": 1.0502291917800903, "learning_rate": 4.756927164427685e-05, "loss": 1.6175, "step": 4785 }, { "epoch": 0.6855751325025068, "grad_norm": 1.211198329925537, "learning_rate": 4.752976824057348e-05, "loss": 1.476, "step": 4786 }, { "epoch": 0.6857183784558086, "grad_norm": 1.0386450290679932, "learning_rate": 4.7490276132412584e-05, "loss": 1.4373, "step": 4787 }, { "epoch": 0.6858616244091105, "grad_norm": 1.0045961141586304, "learning_rate": 4.745079532829582e-05, "loss": 1.2074, "step": 4788 }, { "epoch": 0.6860048703624123, "grad_norm": 1.094271183013916, "learning_rate": 4.741132583672242e-05, "loss": 1.5173, "step": 4789 }, { "epoch": 0.686148116315714, "grad_norm": 1.0525481700897217, "learning_rate": 4.737186766618924e-05, "loss": 1.0464, "step": 4790 }, { "epoch": 0.6862913622690159, "grad_norm": 1.023247480392456, "learning_rate": 4.733242082519054e-05, "loss": 1.3911, "step": 4791 }, { "epoch": 0.6864346082223177, "grad_norm": 1.1460680961608887, "learning_rate": 4.7292985322218355e-05, "loss": 1.3252, "step": 4792 }, { "epoch": 0.6865778541756196, "grad_norm": 1.239241361618042, "learning_rate": 4.7253561165762163e-05, "loss": 1.3775, "step": 4793 }, { "epoch": 0.6867211001289214, "grad_norm": 1.1763759851455688, "learning_rate": 4.721414836430894e-05, "loss": 1.4646, "step": 4794 }, { "epoch": 0.6868643460822231, "grad_norm": 0.8961011171340942, "learning_rate": 4.717474692634334e-05, "loss": 1.4793, "step": 4795 }, { "epoch": 0.687007592035525, "grad_norm": 1.0425609350204468, "learning_rate": 4.713535686034749e-05, "loss": 1.4133, "step": 4796 }, { "epoch": 0.6871508379888268, "grad_norm": 1.2007516622543335, "learning_rate": 4.709597817480112e-05, "loss": 1.547, "step": 4797 }, { "epoch": 0.6872940839421287, "grad_norm": 1.124093770980835, "learning_rate": 4.7056610878181486e-05, "loss": 1.3413, "step": 4798 }, { "epoch": 0.6874373298954305, "grad_norm": 1.3353739976882935, "learning_rate": 4.7017254978963366e-05, "loss": 1.452, "step": 4799 }, { "epoch": 0.6875805758487322, "grad_norm": 1.1405316591262817, "learning_rate": 4.697791048561918e-05, "loss": 1.2804, "step": 4800 }, { "epoch": 0.6877238218020341, "grad_norm": 1.0872316360473633, "learning_rate": 4.693857740661869e-05, "loss": 1.4985, "step": 4801 }, { "epoch": 0.6878670677553359, "grad_norm": 1.1340214014053345, "learning_rate": 4.689925575042946e-05, "loss": 1.1228, "step": 4802 }, { "epoch": 0.6880103137086377, "grad_norm": 1.212959885597229, "learning_rate": 4.685994552551647e-05, "loss": 1.4991, "step": 4803 }, { "epoch": 0.6881535596619396, "grad_norm": 1.1516879796981812, "learning_rate": 4.682064674034216e-05, "loss": 1.4218, "step": 4804 }, { "epoch": 0.6882968056152414, "grad_norm": 1.0645337104797363, "learning_rate": 4.6781359403366576e-05, "loss": 1.4217, "step": 4805 }, { "epoch": 0.6884400515685432, "grad_norm": 1.0520826578140259, "learning_rate": 4.6742083523047434e-05, "loss": 1.3315, "step": 4806 }, { "epoch": 0.688583297521845, "grad_norm": 1.155551552772522, "learning_rate": 4.670281910783974e-05, "loss": 1.5684, "step": 4807 }, { "epoch": 0.6887265434751468, "grad_norm": 1.1492711305618286, "learning_rate": 4.666356616619619e-05, "loss": 1.3786, "step": 4808 }, { "epoch": 0.6888697894284487, "grad_norm": 0.9987225532531738, "learning_rate": 4.662432470656698e-05, "loss": 1.4129, "step": 4809 }, { "epoch": 0.6890130353817505, "grad_norm": 1.320759892463684, "learning_rate": 4.658509473739981e-05, "loss": 1.3739, "step": 4810 }, { "epoch": 0.6891562813350522, "grad_norm": 0.9801371693611145, "learning_rate": 4.6545876267139974e-05, "loss": 1.3402, "step": 4811 }, { "epoch": 0.6892995272883541, "grad_norm": 0.953399121761322, "learning_rate": 4.650666930423012e-05, "loss": 1.51, "step": 4812 }, { "epoch": 0.6894427732416559, "grad_norm": 0.9664946794509888, "learning_rate": 4.646747385711064e-05, "loss": 1.2917, "step": 4813 }, { "epoch": 0.6895860191949578, "grad_norm": 1.1409845352172852, "learning_rate": 4.642828993421936e-05, "loss": 1.3537, "step": 4814 }, { "epoch": 0.6897292651482596, "grad_norm": 1.049520492553711, "learning_rate": 4.638911754399152e-05, "loss": 1.5072, "step": 4815 }, { "epoch": 0.6898725111015613, "grad_norm": 1.217037320137024, "learning_rate": 4.634995669486001e-05, "loss": 1.3279, "step": 4816 }, { "epoch": 0.6900157570548632, "grad_norm": 1.029850959777832, "learning_rate": 4.631080739525518e-05, "loss": 1.1781, "step": 4817 }, { "epoch": 0.690159003008165, "grad_norm": 0.9795482158660889, "learning_rate": 4.6271669653604945e-05, "loss": 1.3603, "step": 4818 }, { "epoch": 0.6903022489614669, "grad_norm": 1.1098555326461792, "learning_rate": 4.623254347833464e-05, "loss": 1.3102, "step": 4819 }, { "epoch": 0.6904454949147687, "grad_norm": 1.0522459745407104, "learning_rate": 4.61934288778672e-05, "loss": 1.3217, "step": 4820 }, { "epoch": 0.6905887408680704, "grad_norm": 0.9890515208244324, "learning_rate": 4.6154325860623037e-05, "loss": 1.3785, "step": 4821 }, { "epoch": 0.6907319868213723, "grad_norm": 0.9646671414375305, "learning_rate": 4.6115234435020016e-05, "loss": 1.4027, "step": 4822 }, { "epoch": 0.6908752327746741, "grad_norm": 1.178161382675171, "learning_rate": 4.607615460947354e-05, "loss": 1.4829, "step": 4823 }, { "epoch": 0.691018478727976, "grad_norm": 1.2263121604919434, "learning_rate": 4.6037086392396654e-05, "loss": 1.4178, "step": 4824 }, { "epoch": 0.6911617246812778, "grad_norm": 1.1153571605682373, "learning_rate": 4.599802979219965e-05, "loss": 1.4907, "step": 4825 }, { "epoch": 0.6913049706345795, "grad_norm": 0.9988410472869873, "learning_rate": 4.5958984817290484e-05, "loss": 1.4451, "step": 4826 }, { "epoch": 0.6914482165878814, "grad_norm": 0.966171383857727, "learning_rate": 4.5919951476074583e-05, "loss": 1.4059, "step": 4827 }, { "epoch": 0.6915914625411832, "grad_norm": 1.328002691268921, "learning_rate": 4.588092977695485e-05, "loss": 1.3912, "step": 4828 }, { "epoch": 0.691734708494485, "grad_norm": 1.2720357179641724, "learning_rate": 4.584191972833175e-05, "loss": 1.4637, "step": 4829 }, { "epoch": 0.6918779544477869, "grad_norm": 1.2456566095352173, "learning_rate": 4.580292133860303e-05, "loss": 1.5103, "step": 4830 }, { "epoch": 0.6920212004010887, "grad_norm": 0.9940177798271179, "learning_rate": 4.576393461616424e-05, "loss": 1.4939, "step": 4831 }, { "epoch": 0.6921644463543905, "grad_norm": 1.3750369548797607, "learning_rate": 4.572495956940821e-05, "loss": 1.5017, "step": 4832 }, { "epoch": 0.6923076923076923, "grad_norm": 1.0775599479675293, "learning_rate": 4.5685996206725253e-05, "loss": 1.3319, "step": 4833 }, { "epoch": 0.6924509382609941, "grad_norm": 1.0888468027114868, "learning_rate": 4.564704453650323e-05, "loss": 1.5101, "step": 4834 }, { "epoch": 0.692594184214296, "grad_norm": 1.3396648168563843, "learning_rate": 4.560810456712754e-05, "loss": 1.5322, "step": 4835 }, { "epoch": 0.6927374301675978, "grad_norm": 1.0036617517471313, "learning_rate": 4.556917630698092e-05, "loss": 1.5268, "step": 4836 }, { "epoch": 0.6928806761208995, "grad_norm": 1.1821216344833374, "learning_rate": 4.553025976444369e-05, "loss": 1.189, "step": 4837 }, { "epoch": 0.6930239220742014, "grad_norm": 1.213313102722168, "learning_rate": 4.5491354947893616e-05, "loss": 1.4063, "step": 4838 }, { "epoch": 0.6931671680275032, "grad_norm": 1.1589694023132324, "learning_rate": 4.545246186570594e-05, "loss": 1.3108, "step": 4839 }, { "epoch": 0.6933104139808051, "grad_norm": 0.9477056264877319, "learning_rate": 4.541358052625342e-05, "loss": 1.6081, "step": 4840 }, { "epoch": 0.6934536599341069, "grad_norm": 0.9669190049171448, "learning_rate": 4.537471093790614e-05, "loss": 1.426, "step": 4841 }, { "epoch": 0.6935969058874086, "grad_norm": 0.9902364611625671, "learning_rate": 4.5335853109031855e-05, "loss": 1.4248, "step": 4842 }, { "epoch": 0.6937401518407105, "grad_norm": 1.1749876737594604, "learning_rate": 4.529700704799571e-05, "loss": 1.5017, "step": 4843 }, { "epoch": 0.6938833977940123, "grad_norm": 1.015008568763733, "learning_rate": 4.525817276316022e-05, "loss": 1.4043, "step": 4844 }, { "epoch": 0.6940266437473142, "grad_norm": 1.004389762878418, "learning_rate": 4.5219350262885484e-05, "loss": 1.5983, "step": 4845 }, { "epoch": 0.694169889700616, "grad_norm": 1.1781011819839478, "learning_rate": 4.518053955552903e-05, "loss": 1.5675, "step": 4846 }, { "epoch": 0.6943131356539177, "grad_norm": 1.3226794004440308, "learning_rate": 4.514174064944584e-05, "loss": 1.3031, "step": 4847 }, { "epoch": 0.6944563816072196, "grad_norm": 0.9350221753120422, "learning_rate": 4.510295355298835e-05, "loss": 1.2796, "step": 4848 }, { "epoch": 0.6945996275605214, "grad_norm": 1.1182013750076294, "learning_rate": 4.506417827450647e-05, "loss": 1.3785, "step": 4849 }, { "epoch": 0.6947428735138232, "grad_norm": 1.0845601558685303, "learning_rate": 4.50254148223476e-05, "loss": 1.1681, "step": 4850 }, { "epoch": 0.6948861194671251, "grad_norm": 0.9920936822891235, "learning_rate": 4.498666320485647e-05, "loss": 1.3027, "step": 4851 }, { "epoch": 0.6950293654204269, "grad_norm": 0.974132776260376, "learning_rate": 4.494792343037535e-05, "loss": 1.5396, "step": 4852 }, { "epoch": 0.6951726113737287, "grad_norm": 0.9596220850944519, "learning_rate": 4.490919550724405e-05, "loss": 1.4654, "step": 4853 }, { "epoch": 0.6953158573270305, "grad_norm": 1.3003628253936768, "learning_rate": 4.487047944379964e-05, "loss": 1.2778, "step": 4854 }, { "epoch": 0.6954591032803323, "grad_norm": 1.043830394744873, "learning_rate": 4.4831775248376764e-05, "loss": 1.4356, "step": 4855 }, { "epoch": 0.6956023492336342, "grad_norm": 1.0722198486328125, "learning_rate": 4.4793082929307474e-05, "loss": 1.4904, "step": 4856 }, { "epoch": 0.695745595186936, "grad_norm": 1.1482727527618408, "learning_rate": 4.4754402494921275e-05, "loss": 1.3148, "step": 4857 }, { "epoch": 0.6958888411402377, "grad_norm": 1.0990558862686157, "learning_rate": 4.471573395354514e-05, "loss": 1.3117, "step": 4858 }, { "epoch": 0.6960320870935396, "grad_norm": 1.0018548965454102, "learning_rate": 4.467707731350332e-05, "loss": 1.4614, "step": 4859 }, { "epoch": 0.6961753330468414, "grad_norm": 1.168513298034668, "learning_rate": 4.4638432583117786e-05, "loss": 1.5093, "step": 4860 }, { "epoch": 0.6963185790001433, "grad_norm": 1.1008967161178589, "learning_rate": 4.4599799770707754e-05, "loss": 1.4191, "step": 4861 }, { "epoch": 0.6964618249534451, "grad_norm": 1.1140096187591553, "learning_rate": 4.4561178884589815e-05, "loss": 1.3664, "step": 4862 }, { "epoch": 0.6966050709067468, "grad_norm": 1.2130491733551025, "learning_rate": 4.452256993307821e-05, "loss": 1.4676, "step": 4863 }, { "epoch": 0.6967483168600487, "grad_norm": 0.9866144061088562, "learning_rate": 4.4483972924484474e-05, "loss": 1.5902, "step": 4864 }, { "epoch": 0.6968915628133505, "grad_norm": 1.0489447116851807, "learning_rate": 4.4445387867117526e-05, "loss": 1.4313, "step": 4865 }, { "epoch": 0.6970348087666524, "grad_norm": 0.9920719861984253, "learning_rate": 4.44068147692838e-05, "loss": 1.4739, "step": 4866 }, { "epoch": 0.6971780547199542, "grad_norm": 0.8992570638656616, "learning_rate": 4.436825363928714e-05, "loss": 1.3678, "step": 4867 }, { "epoch": 0.6973213006732559, "grad_norm": 1.0031867027282715, "learning_rate": 4.43297044854288e-05, "loss": 1.4566, "step": 4868 }, { "epoch": 0.6974645466265578, "grad_norm": 0.9616906642913818, "learning_rate": 4.4291167316007464e-05, "loss": 1.4792, "step": 4869 }, { "epoch": 0.6976077925798596, "grad_norm": 1.168142557144165, "learning_rate": 4.4252642139319226e-05, "loss": 1.2891, "step": 4870 }, { "epoch": 0.6977510385331614, "grad_norm": 1.1153199672698975, "learning_rate": 4.421412896365763e-05, "loss": 1.217, "step": 4871 }, { "epoch": 0.6978942844864633, "grad_norm": 1.0973092317581177, "learning_rate": 4.417562779731355e-05, "loss": 1.6411, "step": 4872 }, { "epoch": 0.6980375304397651, "grad_norm": 1.0448617935180664, "learning_rate": 4.413713864857533e-05, "loss": 1.3711, "step": 4873 }, { "epoch": 0.6981807763930669, "grad_norm": 1.1163080930709839, "learning_rate": 4.409866152572883e-05, "loss": 1.64, "step": 4874 }, { "epoch": 0.6983240223463687, "grad_norm": 1.247266411781311, "learning_rate": 4.406019643705715e-05, "loss": 1.4342, "step": 4875 }, { "epoch": 0.6984672682996705, "grad_norm": 1.0371639728546143, "learning_rate": 4.4021743390840866e-05, "loss": 1.5044, "step": 4876 }, { "epoch": 0.6986105142529724, "grad_norm": 1.0786230564117432, "learning_rate": 4.398330239535797e-05, "loss": 1.5325, "step": 4877 }, { "epoch": 0.6987537602062742, "grad_norm": 1.1284019947052002, "learning_rate": 4.3944873458883887e-05, "loss": 1.5008, "step": 4878 }, { "epoch": 0.698897006159576, "grad_norm": 1.1389825344085693, "learning_rate": 4.390645658969143e-05, "loss": 1.3201, "step": 4879 }, { "epoch": 0.6990402521128778, "grad_norm": 1.1710530519485474, "learning_rate": 4.3868051796050705e-05, "loss": 1.2706, "step": 4880 }, { "epoch": 0.6991834980661796, "grad_norm": 1.2138415575027466, "learning_rate": 4.3829659086229403e-05, "loss": 1.4588, "step": 4881 }, { "epoch": 0.6993267440194815, "grad_norm": 1.1283677816390991, "learning_rate": 4.379127846849255e-05, "loss": 1.5379, "step": 4882 }, { "epoch": 0.6994699899727833, "grad_norm": 1.1636452674865723, "learning_rate": 4.375290995110244e-05, "loss": 1.286, "step": 4883 }, { "epoch": 0.699613235926085, "grad_norm": 1.17005455493927, "learning_rate": 4.371455354231889e-05, "loss": 1.3387, "step": 4884 }, { "epoch": 0.6997564818793869, "grad_norm": 1.1313883066177368, "learning_rate": 4.3676209250399194e-05, "loss": 1.3364, "step": 4885 }, { "epoch": 0.6998997278326887, "grad_norm": 1.0579769611358643, "learning_rate": 4.36378770835978e-05, "loss": 1.5664, "step": 4886 }, { "epoch": 0.7000429737859906, "grad_norm": 1.3555585145950317, "learning_rate": 4.359955705016673e-05, "loss": 1.5081, "step": 4887 }, { "epoch": 0.7001862197392924, "grad_norm": 1.1886733770370483, "learning_rate": 4.356124915835533e-05, "loss": 1.4471, "step": 4888 }, { "epoch": 0.7003294656925941, "grad_norm": 1.4983137845993042, "learning_rate": 4.352295341641035e-05, "loss": 1.2348, "step": 4889 }, { "epoch": 0.700472711645896, "grad_norm": 1.0202289819717407, "learning_rate": 4.348466983257594e-05, "loss": 1.4595, "step": 4890 }, { "epoch": 0.7006159575991978, "grad_norm": 1.1768124103546143, "learning_rate": 4.34463984150935e-05, "loss": 1.3208, "step": 4891 }, { "epoch": 0.7007592035524997, "grad_norm": 1.109227180480957, "learning_rate": 4.3408139172202044e-05, "loss": 1.4597, "step": 4892 }, { "epoch": 0.7009024495058015, "grad_norm": 1.1736289262771606, "learning_rate": 4.336989211213782e-05, "loss": 1.434, "step": 4893 }, { "epoch": 0.7010456954591033, "grad_norm": 0.9314563274383545, "learning_rate": 4.333165724313442e-05, "loss": 1.3474, "step": 4894 }, { "epoch": 0.7011889414124051, "grad_norm": 1.1512372493743896, "learning_rate": 4.329343457342289e-05, "loss": 1.4563, "step": 4895 }, { "epoch": 0.7013321873657069, "grad_norm": 1.0794873237609863, "learning_rate": 4.325522411123162e-05, "loss": 1.3951, "step": 4896 }, { "epoch": 0.7014754333190087, "grad_norm": 1.1541352272033691, "learning_rate": 4.321702586478639e-05, "loss": 1.2084, "step": 4897 }, { "epoch": 0.7016186792723106, "grad_norm": 1.1582107543945312, "learning_rate": 4.3178839842310326e-05, "loss": 1.3965, "step": 4898 }, { "epoch": 0.7017619252256124, "grad_norm": 1.1217702627182007, "learning_rate": 4.3140666052023936e-05, "loss": 1.353, "step": 4899 }, { "epoch": 0.7019051711789142, "grad_norm": 1.0962918996810913, "learning_rate": 4.3102504502145147e-05, "loss": 1.3657, "step": 4900 }, { "epoch": 0.702048417132216, "grad_norm": 0.8652566075325012, "learning_rate": 4.3064355200889094e-05, "loss": 1.2515, "step": 4901 }, { "epoch": 0.7021916630855178, "grad_norm": 0.9534581303596497, "learning_rate": 4.302621815646839e-05, "loss": 1.4601, "step": 4902 }, { "epoch": 0.7023349090388197, "grad_norm": 1.2013819217681885, "learning_rate": 4.298809337709312e-05, "loss": 1.521, "step": 4903 }, { "epoch": 0.7024781549921215, "grad_norm": 0.9915494918823242, "learning_rate": 4.294998087097048e-05, "loss": 1.4599, "step": 4904 }, { "epoch": 0.7026214009454232, "grad_norm": 0.9526614546775818, "learning_rate": 4.2911880646305184e-05, "loss": 1.1174, "step": 4905 }, { "epoch": 0.7027646468987251, "grad_norm": 1.0566946268081665, "learning_rate": 4.2873792711299266e-05, "loss": 1.3874, "step": 4906 }, { "epoch": 0.7029078928520269, "grad_norm": 1.1422756910324097, "learning_rate": 4.283571707415214e-05, "loss": 1.3858, "step": 4907 }, { "epoch": 0.7030511388053288, "grad_norm": 1.0093618631362915, "learning_rate": 4.279765374306055e-05, "loss": 1.612, "step": 4908 }, { "epoch": 0.7031943847586306, "grad_norm": 1.385175108909607, "learning_rate": 4.275960272621852e-05, "loss": 1.4079, "step": 4909 }, { "epoch": 0.7033376307119323, "grad_norm": 0.9692065715789795, "learning_rate": 4.272156403181756e-05, "loss": 1.3884, "step": 4910 }, { "epoch": 0.7034808766652342, "grad_norm": 0.905538022518158, "learning_rate": 4.268353766804649e-05, "loss": 1.3402, "step": 4911 }, { "epoch": 0.703624122618536, "grad_norm": 1.0803182125091553, "learning_rate": 4.2645523643091354e-05, "loss": 1.4981, "step": 4912 }, { "epoch": 0.7037673685718379, "grad_norm": 1.11542809009552, "learning_rate": 4.2607521965135645e-05, "loss": 1.2637, "step": 4913 }, { "epoch": 0.7039106145251397, "grad_norm": 1.3580089807510376, "learning_rate": 4.256953264236029e-05, "loss": 1.4892, "step": 4914 }, { "epoch": 0.7040538604784414, "grad_norm": 1.0030322074890137, "learning_rate": 4.253155568294333e-05, "loss": 1.5783, "step": 4915 }, { "epoch": 0.7041971064317433, "grad_norm": 1.003808617591858, "learning_rate": 4.24935910950603e-05, "loss": 1.3485, "step": 4916 }, { "epoch": 0.7043403523850451, "grad_norm": 1.302235722541809, "learning_rate": 4.245563888688405e-05, "loss": 1.4738, "step": 4917 }, { "epoch": 0.704483598338347, "grad_norm": 1.2146676778793335, "learning_rate": 4.241769906658476e-05, "loss": 1.4636, "step": 4918 }, { "epoch": 0.7046268442916488, "grad_norm": 1.1777825355529785, "learning_rate": 4.2379771642329944e-05, "loss": 1.5306, "step": 4919 }, { "epoch": 0.7047700902449506, "grad_norm": 1.0241835117340088, "learning_rate": 4.234185662228435e-05, "loss": 1.4563, "step": 4920 }, { "epoch": 0.7049133361982524, "grad_norm": 1.1451879739761353, "learning_rate": 4.2303954014610235e-05, "loss": 1.35, "step": 4921 }, { "epoch": 0.7050565821515542, "grad_norm": 1.0630794763565063, "learning_rate": 4.226606382746711e-05, "loss": 1.4199, "step": 4922 }, { "epoch": 0.705199828104856, "grad_norm": 1.169994831085205, "learning_rate": 4.2228186069011696e-05, "loss": 1.4908, "step": 4923 }, { "epoch": 0.7053430740581579, "grad_norm": 1.0153543949127197, "learning_rate": 4.2190320747398206e-05, "loss": 1.5536, "step": 4924 }, { "epoch": 0.7054863200114597, "grad_norm": 1.1942942142486572, "learning_rate": 4.215246787077809e-05, "loss": 1.2849, "step": 4925 }, { "epoch": 0.7056295659647615, "grad_norm": 1.0324727296829224, "learning_rate": 4.2114627447300134e-05, "loss": 1.5281, "step": 4926 }, { "epoch": 0.7057728119180633, "grad_norm": 0.956970751285553, "learning_rate": 4.2076799485110454e-05, "loss": 1.4555, "step": 4927 }, { "epoch": 0.7059160578713651, "grad_norm": 1.093381404876709, "learning_rate": 4.203898399235246e-05, "loss": 1.4237, "step": 4928 }, { "epoch": 0.706059303824667, "grad_norm": 0.9523422718048096, "learning_rate": 4.200118097716694e-05, "loss": 1.4837, "step": 4929 }, { "epoch": 0.7062025497779688, "grad_norm": 1.0700111389160156, "learning_rate": 4.196339044769184e-05, "loss": 1.525, "step": 4930 }, { "epoch": 0.7063457957312705, "grad_norm": 1.2418214082717896, "learning_rate": 4.192561241206262e-05, "loss": 1.4357, "step": 4931 }, { "epoch": 0.7064890416845724, "grad_norm": 1.148093581199646, "learning_rate": 4.1887846878411975e-05, "loss": 1.5205, "step": 4932 }, { "epoch": 0.7066322876378742, "grad_norm": 1.325565218925476, "learning_rate": 4.18500938548698e-05, "loss": 1.5901, "step": 4933 }, { "epoch": 0.7067755335911761, "grad_norm": 1.026126503944397, "learning_rate": 4.181235334956341e-05, "loss": 1.4371, "step": 4934 }, { "epoch": 0.7069187795444779, "grad_norm": 0.9592928290367126, "learning_rate": 4.1774625370617484e-05, "loss": 1.7092, "step": 4935 }, { "epoch": 0.7070620254977796, "grad_norm": 1.3618437051773071, "learning_rate": 4.173690992615382e-05, "loss": 1.261, "step": 4936 }, { "epoch": 0.7072052714510815, "grad_norm": 1.0414167642593384, "learning_rate": 4.1699207024291666e-05, "loss": 1.5409, "step": 4937 }, { "epoch": 0.7073485174043833, "grad_norm": 1.0105513334274292, "learning_rate": 4.1661516673147517e-05, "loss": 1.4434, "step": 4938 }, { "epoch": 0.7074917633576852, "grad_norm": 1.0188097953796387, "learning_rate": 4.162383888083519e-05, "loss": 1.4705, "step": 4939 }, { "epoch": 0.707635009310987, "grad_norm": 1.1254373788833618, "learning_rate": 4.1586173655465785e-05, "loss": 1.5872, "step": 4940 }, { "epoch": 0.7077782552642888, "grad_norm": 1.2008291482925415, "learning_rate": 4.154852100514761e-05, "loss": 1.6866, "step": 4941 }, { "epoch": 0.7079215012175906, "grad_norm": 1.146675944328308, "learning_rate": 4.151088093798644e-05, "loss": 1.3855, "step": 4942 }, { "epoch": 0.7080647471708924, "grad_norm": 1.0632609128952026, "learning_rate": 4.147325346208527e-05, "loss": 1.3466, "step": 4943 }, { "epoch": 0.7082079931241942, "grad_norm": 0.9820842742919922, "learning_rate": 4.143563858554428e-05, "loss": 1.4555, "step": 4944 }, { "epoch": 0.7083512390774961, "grad_norm": 1.0430525541305542, "learning_rate": 4.1398036316461054e-05, "loss": 1.4242, "step": 4945 }, { "epoch": 0.7084944850307979, "grad_norm": 1.210944414138794, "learning_rate": 4.136044666293044e-05, "loss": 1.2304, "step": 4946 }, { "epoch": 0.7086377309840997, "grad_norm": 0.9258332252502441, "learning_rate": 4.132286963304456e-05, "loss": 1.3622, "step": 4947 }, { "epoch": 0.7087809769374015, "grad_norm": 1.2679623365402222, "learning_rate": 4.1285305234892803e-05, "loss": 1.5023, "step": 4948 }, { "epoch": 0.7089242228907033, "grad_norm": 1.226791262626648, "learning_rate": 4.124775347656187e-05, "loss": 1.2274, "step": 4949 }, { "epoch": 0.7090674688440052, "grad_norm": 1.104860544204712, "learning_rate": 4.121021436613576e-05, "loss": 1.3914, "step": 4950 }, { "epoch": 0.709210714797307, "grad_norm": 1.2253968715667725, "learning_rate": 4.117268791169564e-05, "loss": 1.2296, "step": 4951 }, { "epoch": 0.7093539607506087, "grad_norm": 1.125827670097351, "learning_rate": 4.113517412132003e-05, "loss": 1.2573, "step": 4952 }, { "epoch": 0.7094972067039106, "grad_norm": 0.9368603825569153, "learning_rate": 4.109767300308481e-05, "loss": 1.5181, "step": 4953 }, { "epoch": 0.7096404526572124, "grad_norm": 1.0678833723068237, "learning_rate": 4.1060184565062963e-05, "loss": 1.4978, "step": 4954 }, { "epoch": 0.7097836986105143, "grad_norm": 0.97263503074646, "learning_rate": 4.102270881532485e-05, "loss": 1.4448, "step": 4955 }, { "epoch": 0.7099269445638161, "grad_norm": 1.110864281654358, "learning_rate": 4.0985245761938064e-05, "loss": 1.4616, "step": 4956 }, { "epoch": 0.7100701905171178, "grad_norm": 1.060823678970337, "learning_rate": 4.094779541296747e-05, "loss": 1.4392, "step": 4957 }, { "epoch": 0.7102134364704197, "grad_norm": 1.0522089004516602, "learning_rate": 4.091035777647525e-05, "loss": 1.5224, "step": 4958 }, { "epoch": 0.7103566824237215, "grad_norm": 1.0164978504180908, "learning_rate": 4.087293286052069e-05, "loss": 1.4234, "step": 4959 }, { "epoch": 0.7104999283770234, "grad_norm": 1.5885467529296875, "learning_rate": 4.083552067316054e-05, "loss": 1.4112, "step": 4960 }, { "epoch": 0.7106431743303252, "grad_norm": 1.0973089933395386, "learning_rate": 4.079812122244874e-05, "loss": 1.2094, "step": 4961 }, { "epoch": 0.710786420283627, "grad_norm": 1.2338813543319702, "learning_rate": 4.076073451643639e-05, "loss": 1.2646, "step": 4962 }, { "epoch": 0.7109296662369288, "grad_norm": 1.263325572013855, "learning_rate": 4.0723360563171906e-05, "loss": 1.5651, "step": 4963 }, { "epoch": 0.7110729121902306, "grad_norm": 1.002611756324768, "learning_rate": 4.068599937070111e-05, "loss": 1.2438, "step": 4964 }, { "epoch": 0.7112161581435325, "grad_norm": 1.083384394645691, "learning_rate": 4.064865094706681e-05, "loss": 1.3277, "step": 4965 }, { "epoch": 0.7113594040968343, "grad_norm": 1.0699213743209839, "learning_rate": 4.0611315300309246e-05, "loss": 1.5035, "step": 4966 }, { "epoch": 0.7115026500501361, "grad_norm": 0.9197946786880493, "learning_rate": 4.0573992438465866e-05, "loss": 1.5064, "step": 4967 }, { "epoch": 0.7116458960034379, "grad_norm": 1.0107725858688354, "learning_rate": 4.053668236957134e-05, "loss": 1.2366, "step": 4968 }, { "epoch": 0.7117891419567397, "grad_norm": 1.0047035217285156, "learning_rate": 4.049938510165765e-05, "loss": 1.3793, "step": 4969 }, { "epoch": 0.7119323879100415, "grad_norm": 1.089130163192749, "learning_rate": 4.0462100642753856e-05, "loss": 1.3201, "step": 4970 }, { "epoch": 0.7120756338633434, "grad_norm": 1.0256634950637817, "learning_rate": 4.04248290008865e-05, "loss": 1.4263, "step": 4971 }, { "epoch": 0.7122188798166452, "grad_norm": 0.9777927994728088, "learning_rate": 4.038757018407924e-05, "loss": 1.3401, "step": 4972 }, { "epoch": 0.712362125769947, "grad_norm": 1.1109718084335327, "learning_rate": 4.035032420035291e-05, "loss": 1.2865, "step": 4973 }, { "epoch": 0.7125053717232488, "grad_norm": 1.0624184608459473, "learning_rate": 4.031309105772567e-05, "loss": 1.45, "step": 4974 }, { "epoch": 0.7126486176765506, "grad_norm": 1.1249079704284668, "learning_rate": 4.027587076421291e-05, "loss": 1.5996, "step": 4975 }, { "epoch": 0.7127918636298525, "grad_norm": 1.3260663747787476, "learning_rate": 4.023866332782723e-05, "loss": 1.3964, "step": 4976 }, { "epoch": 0.7129351095831543, "grad_norm": 1.0054802894592285, "learning_rate": 4.0201468756578465e-05, "loss": 1.435, "step": 4977 }, { "epoch": 0.713078355536456, "grad_norm": 1.0342075824737549, "learning_rate": 4.01642870584737e-05, "loss": 1.433, "step": 4978 }, { "epoch": 0.7132216014897579, "grad_norm": 1.0962247848510742, "learning_rate": 4.012711824151727e-05, "loss": 1.4263, "step": 4979 }, { "epoch": 0.7133648474430597, "grad_norm": 1.3745440244674683, "learning_rate": 4.008996231371062e-05, "loss": 1.4615, "step": 4980 }, { "epoch": 0.7135080933963616, "grad_norm": 1.053774118423462, "learning_rate": 4.005281928305249e-05, "loss": 1.5309, "step": 4981 }, { "epoch": 0.7136513393496634, "grad_norm": 0.9896112084388733, "learning_rate": 4.0015689157538994e-05, "loss": 1.6359, "step": 4982 }, { "epoch": 0.7137945853029652, "grad_norm": 0.9917699098587036, "learning_rate": 3.997857194516319e-05, "loss": 1.4501, "step": 4983 }, { "epoch": 0.713937831256267, "grad_norm": 1.0307033061981201, "learning_rate": 3.994146765391557e-05, "loss": 1.4745, "step": 4984 }, { "epoch": 0.7140810772095688, "grad_norm": 1.1613492965698242, "learning_rate": 3.990437629178372e-05, "loss": 1.3274, "step": 4985 }, { "epoch": 0.7142243231628707, "grad_norm": 1.281091332435608, "learning_rate": 3.986729786675253e-05, "loss": 1.5138, "step": 4986 }, { "epoch": 0.7143675691161725, "grad_norm": 1.1112534999847412, "learning_rate": 3.983023238680406e-05, "loss": 1.3925, "step": 4987 }, { "epoch": 0.7145108150694743, "grad_norm": 0.9237897396087646, "learning_rate": 3.9793179859917585e-05, "loss": 1.2174, "step": 4988 }, { "epoch": 0.7146540610227761, "grad_norm": 1.222542405128479, "learning_rate": 3.97561402940696e-05, "loss": 1.3699, "step": 4989 }, { "epoch": 0.7147973069760779, "grad_norm": 0.8941904902458191, "learning_rate": 3.9719113697233835e-05, "loss": 1.4045, "step": 4990 }, { "epoch": 0.7149405529293797, "grad_norm": 0.9991548657417297, "learning_rate": 3.96821000773811e-05, "loss": 1.4922, "step": 4991 }, { "epoch": 0.7150837988826816, "grad_norm": 1.3762160539627075, "learning_rate": 3.964509944247962e-05, "loss": 1.5094, "step": 4992 }, { "epoch": 0.7152270448359834, "grad_norm": 1.1509665250778198, "learning_rate": 3.9608111800494715e-05, "loss": 1.5775, "step": 4993 }, { "epoch": 0.7153702907892852, "grad_norm": 1.111878514289856, "learning_rate": 3.957113715938884e-05, "loss": 1.5462, "step": 4994 }, { "epoch": 0.715513536742587, "grad_norm": 1.0730425119400024, "learning_rate": 3.953417552712174e-05, "loss": 1.4371, "step": 4995 }, { "epoch": 0.7156567826958888, "grad_norm": 1.0492804050445557, "learning_rate": 3.949722691165036e-05, "loss": 1.3903, "step": 4996 }, { "epoch": 0.7158000286491907, "grad_norm": 1.172573208808899, "learning_rate": 3.9460291320928825e-05, "loss": 1.4616, "step": 4997 }, { "epoch": 0.7159432746024925, "grad_norm": 1.247603178024292, "learning_rate": 3.942336876290843e-05, "loss": 1.4334, "step": 4998 }, { "epoch": 0.7160865205557942, "grad_norm": 1.048155426979065, "learning_rate": 3.93864592455377e-05, "loss": 1.5198, "step": 4999 }, { "epoch": 0.7162297665090961, "grad_norm": 0.9921081066131592, "learning_rate": 3.9349562776762385e-05, "loss": 1.3085, "step": 5000 }, { "epoch": 0.7163730124623979, "grad_norm": 1.171191930770874, "learning_rate": 3.9312679364525294e-05, "loss": 1.2653, "step": 5001 }, { "epoch": 0.7165162584156998, "grad_norm": 1.1787899732589722, "learning_rate": 3.927580901676653e-05, "loss": 1.4823, "step": 5002 }, { "epoch": 0.7166595043690016, "grad_norm": 1.0939325094223022, "learning_rate": 3.923895174142347e-05, "loss": 1.3794, "step": 5003 }, { "epoch": 0.7168027503223035, "grad_norm": 0.9014527797698975, "learning_rate": 3.9202107546430456e-05, "loss": 1.3886, "step": 5004 }, { "epoch": 0.7169459962756052, "grad_norm": 0.928516149520874, "learning_rate": 3.916527643971917e-05, "loss": 1.4484, "step": 5005 }, { "epoch": 0.717089242228907, "grad_norm": 1.04092538356781, "learning_rate": 3.912845842921844e-05, "loss": 1.3436, "step": 5006 }, { "epoch": 0.7172324881822089, "grad_norm": 1.0407545566558838, "learning_rate": 3.909165352285429e-05, "loss": 1.3823, "step": 5007 }, { "epoch": 0.7173757341355107, "grad_norm": 0.9373136758804321, "learning_rate": 3.905486172854991e-05, "loss": 1.3684, "step": 5008 }, { "epoch": 0.7175189800888125, "grad_norm": 0.9039185047149658, "learning_rate": 3.9018083054225564e-05, "loss": 1.4995, "step": 5009 }, { "epoch": 0.7176622260421143, "grad_norm": 1.133274793624878, "learning_rate": 3.898131750779892e-05, "loss": 1.2873, "step": 5010 }, { "epoch": 0.7178054719954161, "grad_norm": 0.9841527938842773, "learning_rate": 3.894456509718466e-05, "loss": 1.3205, "step": 5011 }, { "epoch": 0.717948717948718, "grad_norm": 0.927627682685852, "learning_rate": 3.890782583029462e-05, "loss": 1.3358, "step": 5012 }, { "epoch": 0.7180919639020198, "grad_norm": 1.21440851688385, "learning_rate": 3.887109971503785e-05, "loss": 1.5089, "step": 5013 }, { "epoch": 0.7182352098553216, "grad_norm": 0.9294370412826538, "learning_rate": 3.883438675932067e-05, "loss": 1.3146, "step": 5014 }, { "epoch": 0.7183784558086234, "grad_norm": 0.9559040069580078, "learning_rate": 3.8797686971046375e-05, "loss": 1.548, "step": 5015 }, { "epoch": 0.7185217017619252, "grad_norm": 0.9770917892456055, "learning_rate": 3.876100035811555e-05, "loss": 1.3953, "step": 5016 }, { "epoch": 0.718664947715227, "grad_norm": 1.0446397066116333, "learning_rate": 3.872432692842591e-05, "loss": 1.6165, "step": 5017 }, { "epoch": 0.7188081936685289, "grad_norm": 1.059687852859497, "learning_rate": 3.868766668987236e-05, "loss": 1.4572, "step": 5018 }, { "epoch": 0.7189514396218307, "grad_norm": 1.1223117113113403, "learning_rate": 3.8651019650346955e-05, "loss": 1.4853, "step": 5019 }, { "epoch": 0.7190946855751325, "grad_norm": 1.1878575086593628, "learning_rate": 3.8614385817738794e-05, "loss": 1.485, "step": 5020 }, { "epoch": 0.7192379315284343, "grad_norm": 1.1482911109924316, "learning_rate": 3.8577765199934354e-05, "loss": 1.4172, "step": 5021 }, { "epoch": 0.7193811774817361, "grad_norm": 1.0261939764022827, "learning_rate": 3.854115780481713e-05, "loss": 1.515, "step": 5022 }, { "epoch": 0.719524423435038, "grad_norm": 1.0533255338668823, "learning_rate": 3.8504563640267735e-05, "loss": 1.521, "step": 5023 }, { "epoch": 0.7196676693883398, "grad_norm": 1.0538556575775146, "learning_rate": 3.8467982714163994e-05, "loss": 1.561, "step": 5024 }, { "epoch": 0.7198109153416415, "grad_norm": 1.0932462215423584, "learning_rate": 3.843141503438091e-05, "loss": 1.3337, "step": 5025 }, { "epoch": 0.7199541612949434, "grad_norm": 0.9990554451942444, "learning_rate": 3.839486060879057e-05, "loss": 1.2824, "step": 5026 }, { "epoch": 0.7200974072482452, "grad_norm": 1.1891356706619263, "learning_rate": 3.8358319445262256e-05, "loss": 1.2647, "step": 5027 }, { "epoch": 0.7202406532015471, "grad_norm": 1.3223845958709717, "learning_rate": 3.832179155166238e-05, "loss": 1.3523, "step": 5028 }, { "epoch": 0.7203838991548489, "grad_norm": 1.1276562213897705, "learning_rate": 3.828527693585451e-05, "loss": 1.3522, "step": 5029 }, { "epoch": 0.7205271451081507, "grad_norm": 1.06482994556427, "learning_rate": 3.8248775605699285e-05, "loss": 1.3855, "step": 5030 }, { "epoch": 0.7206703910614525, "grad_norm": 1.0633624792099, "learning_rate": 3.821228756905452e-05, "loss": 1.3052, "step": 5031 }, { "epoch": 0.7208136370147543, "grad_norm": 1.0099753141403198, "learning_rate": 3.817581283377532e-05, "loss": 1.3413, "step": 5032 }, { "epoch": 0.7209568829680562, "grad_norm": 1.0035628080368042, "learning_rate": 3.813935140771365e-05, "loss": 1.1599, "step": 5033 }, { "epoch": 0.721100128921358, "grad_norm": 1.297598123550415, "learning_rate": 3.810290329871882e-05, "loss": 1.2384, "step": 5034 }, { "epoch": 0.7212433748746598, "grad_norm": 1.1277319192886353, "learning_rate": 3.8066468514637186e-05, "loss": 1.3573, "step": 5035 }, { "epoch": 0.7213866208279616, "grad_norm": 1.2562358379364014, "learning_rate": 3.803004706331228e-05, "loss": 1.544, "step": 5036 }, { "epoch": 0.7215298667812634, "grad_norm": 1.0702641010284424, "learning_rate": 3.7993638952584744e-05, "loss": 1.3725, "step": 5037 }, { "epoch": 0.7216731127345652, "grad_norm": 1.4319472312927246, "learning_rate": 3.7957244190292264e-05, "loss": 1.4949, "step": 5038 }, { "epoch": 0.7218163586878671, "grad_norm": 1.0116281509399414, "learning_rate": 3.792086278426982e-05, "loss": 1.3818, "step": 5039 }, { "epoch": 0.7219596046411689, "grad_norm": 1.0370063781738281, "learning_rate": 3.788449474234943e-05, "loss": 1.4451, "step": 5040 }, { "epoch": 0.7221028505944707, "grad_norm": 1.1651767492294312, "learning_rate": 3.784814007236016e-05, "loss": 1.4135, "step": 5041 }, { "epoch": 0.7222460965477725, "grad_norm": 1.0801609754562378, "learning_rate": 3.781179878212829e-05, "loss": 1.3637, "step": 5042 }, { "epoch": 0.7223893425010743, "grad_norm": 1.114863634109497, "learning_rate": 3.777547087947729e-05, "loss": 1.3898, "step": 5043 }, { "epoch": 0.7225325884543762, "grad_norm": 1.1830052137374878, "learning_rate": 3.773915637222756e-05, "loss": 1.3423, "step": 5044 }, { "epoch": 0.722675834407678, "grad_norm": 1.0711772441864014, "learning_rate": 3.770285526819674e-05, "loss": 1.3016, "step": 5045 }, { "epoch": 0.7228190803609797, "grad_norm": 1.1650713682174683, "learning_rate": 3.766656757519956e-05, "loss": 1.3565, "step": 5046 }, { "epoch": 0.7229623263142816, "grad_norm": 0.9908804297447205, "learning_rate": 3.763029330104788e-05, "loss": 1.3839, "step": 5047 }, { "epoch": 0.7231055722675834, "grad_norm": 1.3818883895874023, "learning_rate": 3.759403245355068e-05, "loss": 1.2427, "step": 5048 }, { "epoch": 0.7232488182208853, "grad_norm": 0.8899428248405457, "learning_rate": 3.7557785040513905e-05, "loss": 1.2043, "step": 5049 }, { "epoch": 0.7233920641741871, "grad_norm": 1.0241647958755493, "learning_rate": 3.752155106974085e-05, "loss": 1.4297, "step": 5050 }, { "epoch": 0.723535310127489, "grad_norm": 1.2215874195098877, "learning_rate": 3.7485330549031775e-05, "loss": 1.3857, "step": 5051 }, { "epoch": 0.7236785560807907, "grad_norm": 1.0548607110977173, "learning_rate": 3.744912348618399e-05, "loss": 1.4524, "step": 5052 }, { "epoch": 0.7238218020340925, "grad_norm": 1.242562174797058, "learning_rate": 3.741292988899204e-05, "loss": 1.5793, "step": 5053 }, { "epoch": 0.7239650479873944, "grad_norm": 0.9969272613525391, "learning_rate": 3.737674976524749e-05, "loss": 1.4581, "step": 5054 }, { "epoch": 0.7241082939406962, "grad_norm": 0.9887746572494507, "learning_rate": 3.734058312273904e-05, "loss": 1.4328, "step": 5055 }, { "epoch": 0.724251539893998, "grad_norm": 1.2633026838302612, "learning_rate": 3.730442996925245e-05, "loss": 1.5669, "step": 5056 }, { "epoch": 0.7243947858472998, "grad_norm": 1.0923923254013062, "learning_rate": 3.726829031257062e-05, "loss": 1.315, "step": 5057 }, { "epoch": 0.7245380318006016, "grad_norm": 1.240658164024353, "learning_rate": 3.723216416047355e-05, "loss": 1.352, "step": 5058 }, { "epoch": 0.7246812777539035, "grad_norm": 0.872370183467865, "learning_rate": 3.7196051520738204e-05, "loss": 1.5188, "step": 5059 }, { "epoch": 0.7248245237072053, "grad_norm": 1.0220781564712524, "learning_rate": 3.7159952401138844e-05, "loss": 1.3101, "step": 5060 }, { "epoch": 0.7249677696605071, "grad_norm": 1.0093969106674194, "learning_rate": 3.712386680944672e-05, "loss": 1.4861, "step": 5061 }, { "epoch": 0.7251110156138089, "grad_norm": 1.1113320589065552, "learning_rate": 3.708779475343009e-05, "loss": 1.4861, "step": 5062 }, { "epoch": 0.7252542615671107, "grad_norm": 1.0081902742385864, "learning_rate": 3.705173624085438e-05, "loss": 1.3892, "step": 5063 }, { "epoch": 0.7253975075204125, "grad_norm": 1.1471179723739624, "learning_rate": 3.70156912794822e-05, "loss": 1.1921, "step": 5064 }, { "epoch": 0.7255407534737144, "grad_norm": 1.2230408191680908, "learning_rate": 3.6979659877073034e-05, "loss": 1.2967, "step": 5065 }, { "epoch": 0.7256839994270162, "grad_norm": 1.2109131813049316, "learning_rate": 3.694364204138359e-05, "loss": 1.4671, "step": 5066 }, { "epoch": 0.725827245380318, "grad_norm": 1.0959734916687012, "learning_rate": 3.6907637780167604e-05, "loss": 1.3604, "step": 5067 }, { "epoch": 0.7259704913336198, "grad_norm": 1.0213323831558228, "learning_rate": 3.687164710117592e-05, "loss": 1.3356, "step": 5068 }, { "epoch": 0.7261137372869216, "grad_norm": 1.0476126670837402, "learning_rate": 3.6835670012156456e-05, "loss": 1.4403, "step": 5069 }, { "epoch": 0.7262569832402235, "grad_norm": 1.0093845129013062, "learning_rate": 3.6799706520854094e-05, "loss": 1.5003, "step": 5070 }, { "epoch": 0.7264002291935253, "grad_norm": 1.210913062095642, "learning_rate": 3.6763756635010993e-05, "loss": 1.5885, "step": 5071 }, { "epoch": 0.7265434751468272, "grad_norm": 0.9551475644111633, "learning_rate": 3.672782036236627e-05, "loss": 1.4741, "step": 5072 }, { "epoch": 0.7266867211001289, "grad_norm": 1.0842825174331665, "learning_rate": 3.669189771065604e-05, "loss": 1.4197, "step": 5073 }, { "epoch": 0.7268299670534307, "grad_norm": 1.1217559576034546, "learning_rate": 3.6655988687613605e-05, "loss": 1.4862, "step": 5074 }, { "epoch": 0.7269732130067326, "grad_norm": 1.084631323814392, "learning_rate": 3.6620093300969284e-05, "loss": 1.3352, "step": 5075 }, { "epoch": 0.7271164589600344, "grad_norm": 1.1429111957550049, "learning_rate": 3.658421155845047e-05, "loss": 1.5375, "step": 5076 }, { "epoch": 0.7272597049133362, "grad_norm": 1.1932175159454346, "learning_rate": 3.654834346778161e-05, "loss": 1.3544, "step": 5077 }, { "epoch": 0.727402950866638, "grad_norm": 1.2409740686416626, "learning_rate": 3.651248903668421e-05, "loss": 1.2834, "step": 5078 }, { "epoch": 0.7275461968199398, "grad_norm": 1.1142911911010742, "learning_rate": 3.647664827287688e-05, "loss": 1.2448, "step": 5079 }, { "epoch": 0.7276894427732417, "grad_norm": 1.0831637382507324, "learning_rate": 3.644082118407519e-05, "loss": 1.5494, "step": 5080 }, { "epoch": 0.7278326887265435, "grad_norm": 1.108092188835144, "learning_rate": 3.640500777799182e-05, "loss": 1.309, "step": 5081 }, { "epoch": 0.7279759346798453, "grad_norm": 1.0170727968215942, "learning_rate": 3.636920806233661e-05, "loss": 1.4854, "step": 5082 }, { "epoch": 0.7281191806331471, "grad_norm": 1.1278061866760254, "learning_rate": 3.6333422044816256e-05, "loss": 1.3182, "step": 5083 }, { "epoch": 0.7282624265864489, "grad_norm": 1.0599029064178467, "learning_rate": 3.629764973313463e-05, "loss": 1.4213, "step": 5084 }, { "epoch": 0.7284056725397507, "grad_norm": 1.0957692861557007, "learning_rate": 3.626189113499262e-05, "loss": 1.4757, "step": 5085 }, { "epoch": 0.7285489184930526, "grad_norm": 1.0854219198226929, "learning_rate": 3.622614625808819e-05, "loss": 1.4341, "step": 5086 }, { "epoch": 0.7286921644463544, "grad_norm": 1.1914632320404053, "learning_rate": 3.6190415110116336e-05, "loss": 1.207, "step": 5087 }, { "epoch": 0.7288354103996562, "grad_norm": 0.8960670828819275, "learning_rate": 3.6154697698769005e-05, "loss": 1.5671, "step": 5088 }, { "epoch": 0.728978656352958, "grad_norm": 1.4607712030410767, "learning_rate": 3.6118994031735365e-05, "loss": 1.3311, "step": 5089 }, { "epoch": 0.7291219023062598, "grad_norm": 1.1879698038101196, "learning_rate": 3.6083304116701535e-05, "loss": 1.4848, "step": 5090 }, { "epoch": 0.7292651482595617, "grad_norm": 1.1448615789413452, "learning_rate": 3.604762796135059e-05, "loss": 1.3068, "step": 5091 }, { "epoch": 0.7294083942128635, "grad_norm": 1.0488063097000122, "learning_rate": 3.601196557336275e-05, "loss": 1.2553, "step": 5092 }, { "epoch": 0.7295516401661654, "grad_norm": 1.0632097721099854, "learning_rate": 3.5976316960415334e-05, "loss": 1.3257, "step": 5093 }, { "epoch": 0.7296948861194671, "grad_norm": 1.0820939540863037, "learning_rate": 3.594068213018249e-05, "loss": 1.4148, "step": 5094 }, { "epoch": 0.7298381320727689, "grad_norm": 1.1446694135665894, "learning_rate": 3.590506109033558e-05, "loss": 1.4865, "step": 5095 }, { "epoch": 0.7299813780260708, "grad_norm": 1.1402205228805542, "learning_rate": 3.58694538485429e-05, "loss": 1.4474, "step": 5096 }, { "epoch": 0.7301246239793726, "grad_norm": 1.3481671810150146, "learning_rate": 3.583386041246982e-05, "loss": 1.6087, "step": 5097 }, { "epoch": 0.7302678699326745, "grad_norm": 1.242810606956482, "learning_rate": 3.579828078977877e-05, "loss": 1.2383, "step": 5098 }, { "epoch": 0.7304111158859762, "grad_norm": 1.1956006288528442, "learning_rate": 3.576271498812904e-05, "loss": 1.4584, "step": 5099 }, { "epoch": 0.730554361839278, "grad_norm": 1.2181980609893799, "learning_rate": 3.572716301517719e-05, "loss": 1.2328, "step": 5100 }, { "epoch": 0.7306976077925799, "grad_norm": 1.0998504161834717, "learning_rate": 3.5691624878576666e-05, "loss": 1.5332, "step": 5101 }, { "epoch": 0.7308408537458817, "grad_norm": 1.0666316747665405, "learning_rate": 3.565610058597789e-05, "loss": 1.4041, "step": 5102 }, { "epoch": 0.7309840996991835, "grad_norm": 1.1092159748077393, "learning_rate": 3.56205901450284e-05, "loss": 1.3183, "step": 5103 }, { "epoch": 0.7311273456524853, "grad_norm": 0.8322303295135498, "learning_rate": 3.5585093563372706e-05, "loss": 1.4843, "step": 5104 }, { "epoch": 0.7312705916057871, "grad_norm": 1.0714200735092163, "learning_rate": 3.5549610848652335e-05, "loss": 1.4965, "step": 5105 }, { "epoch": 0.731413837559089, "grad_norm": 1.228163480758667, "learning_rate": 3.551414200850586e-05, "loss": 1.5391, "step": 5106 }, { "epoch": 0.7315570835123908, "grad_norm": 1.0059279203414917, "learning_rate": 3.547868705056882e-05, "loss": 1.3441, "step": 5107 }, { "epoch": 0.7317003294656926, "grad_norm": 1.0050758123397827, "learning_rate": 3.544324598247386e-05, "loss": 1.2361, "step": 5108 }, { "epoch": 0.7318435754189944, "grad_norm": 1.0628576278686523, "learning_rate": 3.540781881185046e-05, "loss": 1.5581, "step": 5109 }, { "epoch": 0.7319868213722962, "grad_norm": 0.9868745803833008, "learning_rate": 3.537240554632523e-05, "loss": 1.253, "step": 5110 }, { "epoch": 0.732130067325598, "grad_norm": 1.0466634035110474, "learning_rate": 3.533700619352187e-05, "loss": 1.2713, "step": 5111 }, { "epoch": 0.7322733132788999, "grad_norm": 1.196458339691162, "learning_rate": 3.530162076106088e-05, "loss": 1.2757, "step": 5112 }, { "epoch": 0.7324165592322017, "grad_norm": 1.0627546310424805, "learning_rate": 3.52662492565599e-05, "loss": 1.2646, "step": 5113 }, { "epoch": 0.7325598051855035, "grad_norm": 1.017972469329834, "learning_rate": 3.523089168763355e-05, "loss": 1.3142, "step": 5114 }, { "epoch": 0.7327030511388053, "grad_norm": 0.9428439736366272, "learning_rate": 3.519554806189343e-05, "loss": 1.4131, "step": 5115 }, { "epoch": 0.7328462970921071, "grad_norm": 1.3508042097091675, "learning_rate": 3.516021838694815e-05, "loss": 1.378, "step": 5116 }, { "epoch": 0.732989543045409, "grad_norm": 1.1308386325836182, "learning_rate": 3.512490267040333e-05, "loss": 1.2781, "step": 5117 }, { "epoch": 0.7331327889987108, "grad_norm": 1.1428152322769165, "learning_rate": 3.5089600919861535e-05, "loss": 1.1993, "step": 5118 }, { "epoch": 0.7332760349520127, "grad_norm": 1.0723371505737305, "learning_rate": 3.505431314292243e-05, "loss": 1.4097, "step": 5119 }, { "epoch": 0.7334192809053144, "grad_norm": 1.0461530685424805, "learning_rate": 3.501903934718247e-05, "loss": 1.4394, "step": 5120 }, { "epoch": 0.7335625268586162, "grad_norm": 1.0470033884048462, "learning_rate": 3.498377954023534e-05, "loss": 1.5315, "step": 5121 }, { "epoch": 0.7337057728119181, "grad_norm": 1.0908406972885132, "learning_rate": 3.4948533729671616e-05, "loss": 1.2682, "step": 5122 }, { "epoch": 0.7338490187652199, "grad_norm": 1.1356652975082397, "learning_rate": 3.4913301923078765e-05, "loss": 1.3389, "step": 5123 }, { "epoch": 0.7339922647185217, "grad_norm": 0.9959984421730042, "learning_rate": 3.4878084128041366e-05, "loss": 1.4553, "step": 5124 }, { "epoch": 0.7341355106718235, "grad_norm": 1.1513054370880127, "learning_rate": 3.484288035214095e-05, "loss": 1.3737, "step": 5125 }, { "epoch": 0.7342787566251253, "grad_norm": 1.2114496231079102, "learning_rate": 3.480769060295599e-05, "loss": 1.4182, "step": 5126 }, { "epoch": 0.7344220025784272, "grad_norm": 1.0501726865768433, "learning_rate": 3.477251488806199e-05, "loss": 1.3188, "step": 5127 }, { "epoch": 0.734565248531729, "grad_norm": 1.175553560256958, "learning_rate": 3.47373532150314e-05, "loss": 1.4866, "step": 5128 }, { "epoch": 0.7347084944850308, "grad_norm": 1.1490381956100464, "learning_rate": 3.47022055914337e-05, "loss": 1.4673, "step": 5129 }, { "epoch": 0.7348517404383326, "grad_norm": 0.9892501831054688, "learning_rate": 3.466707202483523e-05, "loss": 1.462, "step": 5130 }, { "epoch": 0.7349949863916344, "grad_norm": 1.251485824584961, "learning_rate": 3.463195252279939e-05, "loss": 1.4227, "step": 5131 }, { "epoch": 0.7351382323449362, "grad_norm": 1.1222338676452637, "learning_rate": 3.459684709288663e-05, "loss": 1.4306, "step": 5132 }, { "epoch": 0.7352814782982381, "grad_norm": 1.1060682535171509, "learning_rate": 3.456175574265418e-05, "loss": 1.4484, "step": 5133 }, { "epoch": 0.7354247242515399, "grad_norm": 1.0340923070907593, "learning_rate": 3.452667847965636e-05, "loss": 1.4221, "step": 5134 }, { "epoch": 0.7355679702048417, "grad_norm": 1.0253523588180542, "learning_rate": 3.449161531144447e-05, "loss": 1.3562, "step": 5135 }, { "epoch": 0.7357112161581435, "grad_norm": 1.041393518447876, "learning_rate": 3.4456566245566715e-05, "loss": 1.4077, "step": 5136 }, { "epoch": 0.7358544621114453, "grad_norm": 0.9283429980278015, "learning_rate": 3.4421531289568324e-05, "loss": 1.4198, "step": 5137 }, { "epoch": 0.7359977080647472, "grad_norm": 1.090798258781433, "learning_rate": 3.438651045099137e-05, "loss": 1.4804, "step": 5138 }, { "epoch": 0.736140954018049, "grad_norm": 1.0441683530807495, "learning_rate": 3.4351503737375065e-05, "loss": 1.5021, "step": 5139 }, { "epoch": 0.7362841999713509, "grad_norm": 1.1803879737854004, "learning_rate": 3.4316511156255494e-05, "loss": 1.187, "step": 5140 }, { "epoch": 0.7364274459246526, "grad_norm": 1.0683974027633667, "learning_rate": 3.428153271516562e-05, "loss": 1.2241, "step": 5141 }, { "epoch": 0.7365706918779544, "grad_norm": 1.0174381732940674, "learning_rate": 3.424656842163545e-05, "loss": 1.2744, "step": 5142 }, { "epoch": 0.7367139378312563, "grad_norm": 0.9443196654319763, "learning_rate": 3.4211618283192014e-05, "loss": 1.3846, "step": 5143 }, { "epoch": 0.7368571837845581, "grad_norm": 0.9626579284667969, "learning_rate": 3.417668230735912e-05, "loss": 1.343, "step": 5144 }, { "epoch": 0.73700042973786, "grad_norm": 1.0951868295669556, "learning_rate": 3.414176050165765e-05, "loss": 1.4742, "step": 5145 }, { "epoch": 0.7371436756911617, "grad_norm": 1.2620041370391846, "learning_rate": 3.41068528736054e-05, "loss": 1.5067, "step": 5146 }, { "epoch": 0.7372869216444635, "grad_norm": 1.128720760345459, "learning_rate": 3.4071959430717124e-05, "loss": 1.3316, "step": 5147 }, { "epoch": 0.7374301675977654, "grad_norm": 1.0559282302856445, "learning_rate": 3.403708018050456e-05, "loss": 1.127, "step": 5148 }, { "epoch": 0.7375734135510672, "grad_norm": 1.3836275339126587, "learning_rate": 3.4002215130476236e-05, "loss": 1.2041, "step": 5149 }, { "epoch": 0.737716659504369, "grad_norm": 1.0276132822036743, "learning_rate": 3.3967364288137824e-05, "loss": 1.2263, "step": 5150 }, { "epoch": 0.7378599054576708, "grad_norm": 1.2269060611724854, "learning_rate": 3.393252766099187e-05, "loss": 1.4776, "step": 5151 }, { "epoch": 0.7380031514109726, "grad_norm": 1.0339391231536865, "learning_rate": 3.389770525653777e-05, "loss": 1.3279, "step": 5152 }, { "epoch": 0.7381463973642745, "grad_norm": 1.130679965019226, "learning_rate": 3.386289708227194e-05, "loss": 1.6257, "step": 5153 }, { "epoch": 0.7382896433175763, "grad_norm": 1.0850118398666382, "learning_rate": 3.3828103145687726e-05, "loss": 1.286, "step": 5154 }, { "epoch": 0.7384328892708781, "grad_norm": 1.1260206699371338, "learning_rate": 3.379332345427541e-05, "loss": 1.3295, "step": 5155 }, { "epoch": 0.7385761352241799, "grad_norm": 1.1195250749588013, "learning_rate": 3.375855801552219e-05, "loss": 1.4368, "step": 5156 }, { "epoch": 0.7387193811774817, "grad_norm": 0.9693967700004578, "learning_rate": 3.372380683691221e-05, "loss": 1.2948, "step": 5157 }, { "epoch": 0.7388626271307835, "grad_norm": 0.9901983737945557, "learning_rate": 3.368906992592656e-05, "loss": 1.4092, "step": 5158 }, { "epoch": 0.7390058730840854, "grad_norm": 1.116674542427063, "learning_rate": 3.36543472900432e-05, "loss": 1.3243, "step": 5159 }, { "epoch": 0.7391491190373872, "grad_norm": 0.9755721688270569, "learning_rate": 3.361963893673703e-05, "loss": 1.4994, "step": 5160 }, { "epoch": 0.7392923649906891, "grad_norm": 1.034799575805664, "learning_rate": 3.358494487348e-05, "loss": 1.311, "step": 5161 }, { "epoch": 0.7394356109439908, "grad_norm": 0.9473602175712585, "learning_rate": 3.355026510774079e-05, "loss": 1.4483, "step": 5162 }, { "epoch": 0.7395788568972926, "grad_norm": 1.32988440990448, "learning_rate": 3.3515599646985116e-05, "loss": 1.3638, "step": 5163 }, { "epoch": 0.7397221028505945, "grad_norm": 0.9939714670181274, "learning_rate": 3.3480948498675616e-05, "loss": 1.2631, "step": 5164 }, { "epoch": 0.7398653488038963, "grad_norm": 1.0577690601348877, "learning_rate": 3.344631167027181e-05, "loss": 1.3235, "step": 5165 }, { "epoch": 0.7400085947571982, "grad_norm": 1.0976585149765015, "learning_rate": 3.341168916923019e-05, "loss": 1.4051, "step": 5166 }, { "epoch": 0.7401518407104999, "grad_norm": 1.1382545232772827, "learning_rate": 3.3377081003004017e-05, "loss": 1.468, "step": 5167 }, { "epoch": 0.7402950866638017, "grad_norm": 1.0567196607589722, "learning_rate": 3.334248717904368e-05, "loss": 1.3457, "step": 5168 }, { "epoch": 0.7404383326171036, "grad_norm": 1.3578484058380127, "learning_rate": 3.330790770479636e-05, "loss": 1.3705, "step": 5169 }, { "epoch": 0.7405815785704054, "grad_norm": 1.237394094467163, "learning_rate": 3.327334258770611e-05, "loss": 1.4007, "step": 5170 }, { "epoch": 0.7407248245237072, "grad_norm": 1.1391876935958862, "learning_rate": 3.3238791835213944e-05, "loss": 1.4434, "step": 5171 }, { "epoch": 0.740868070477009, "grad_norm": 0.958648145198822, "learning_rate": 3.320425545475787e-05, "loss": 1.6004, "step": 5172 }, { "epoch": 0.7410113164303108, "grad_norm": 1.034090518951416, "learning_rate": 3.316973345377263e-05, "loss": 1.4909, "step": 5173 }, { "epoch": 0.7411545623836127, "grad_norm": 0.9962390065193176, "learning_rate": 3.313522583969e-05, "loss": 1.501, "step": 5174 }, { "epoch": 0.7412978083369145, "grad_norm": 1.2294760942459106, "learning_rate": 3.3100732619938576e-05, "loss": 1.3623, "step": 5175 }, { "epoch": 0.7414410542902163, "grad_norm": 1.0550763607025146, "learning_rate": 3.306625380194394e-05, "loss": 1.3483, "step": 5176 }, { "epoch": 0.7415843002435181, "grad_norm": 0.9557855129241943, "learning_rate": 3.303178939312849e-05, "loss": 1.6285, "step": 5177 }, { "epoch": 0.7417275461968199, "grad_norm": 1.0615184307098389, "learning_rate": 3.299733940091159e-05, "loss": 1.2086, "step": 5178 }, { "epoch": 0.7418707921501217, "grad_norm": 1.1278741359710693, "learning_rate": 3.296290383270948e-05, "loss": 1.4017, "step": 5179 }, { "epoch": 0.7420140381034236, "grad_norm": 1.0902199745178223, "learning_rate": 3.292848269593524e-05, "loss": 1.287, "step": 5180 }, { "epoch": 0.7421572840567254, "grad_norm": 0.8778855800628662, "learning_rate": 3.2894075997998876e-05, "loss": 1.4942, "step": 5181 }, { "epoch": 0.7423005300100273, "grad_norm": 1.0814718008041382, "learning_rate": 3.28596837463074e-05, "loss": 1.5752, "step": 5182 }, { "epoch": 0.742443775963329, "grad_norm": 1.0368082523345947, "learning_rate": 3.282530594826452e-05, "loss": 1.4614, "step": 5183 }, { "epoch": 0.7425870219166308, "grad_norm": 1.0122549533843994, "learning_rate": 3.2790942611270955e-05, "loss": 1.4966, "step": 5184 }, { "epoch": 0.7427302678699327, "grad_norm": 1.2061089277267456, "learning_rate": 3.2756593742724274e-05, "loss": 1.4116, "step": 5185 }, { "epoch": 0.7428735138232345, "grad_norm": 1.0547289848327637, "learning_rate": 3.272225935001895e-05, "loss": 1.4862, "step": 5186 }, { "epoch": 0.7430167597765364, "grad_norm": 1.1018575429916382, "learning_rate": 3.268793944054636e-05, "loss": 1.4485, "step": 5187 }, { "epoch": 0.7431600057298381, "grad_norm": 1.1554844379425049, "learning_rate": 3.265363402169461e-05, "loss": 1.3146, "step": 5188 }, { "epoch": 0.7433032516831399, "grad_norm": 1.0814515352249146, "learning_rate": 3.261934310084894e-05, "loss": 1.4899, "step": 5189 }, { "epoch": 0.7434464976364418, "grad_norm": 0.9772927761077881, "learning_rate": 3.258506668539132e-05, "loss": 1.4447, "step": 5190 }, { "epoch": 0.7435897435897436, "grad_norm": 0.9453019499778748, "learning_rate": 3.255080478270054e-05, "loss": 1.3508, "step": 5191 }, { "epoch": 0.7437329895430455, "grad_norm": 1.1947277784347534, "learning_rate": 3.251655740015236e-05, "loss": 1.7345, "step": 5192 }, { "epoch": 0.7438762354963472, "grad_norm": 1.1774908304214478, "learning_rate": 3.248232454511947e-05, "loss": 1.4239, "step": 5193 }, { "epoch": 0.744019481449649, "grad_norm": 1.0628496408462524, "learning_rate": 3.2448106224971275e-05, "loss": 1.4381, "step": 5194 }, { "epoch": 0.7441627274029509, "grad_norm": 1.1268256902694702, "learning_rate": 3.2413902447074164e-05, "loss": 1.4024, "step": 5195 }, { "epoch": 0.7443059733562527, "grad_norm": 0.9839563369750977, "learning_rate": 3.2379713218791355e-05, "loss": 1.2616, "step": 5196 }, { "epoch": 0.7444492193095545, "grad_norm": 1.020828127861023, "learning_rate": 3.2345538547482945e-05, "loss": 1.2967, "step": 5197 }, { "epoch": 0.7445924652628563, "grad_norm": 1.2020199298858643, "learning_rate": 3.231137844050593e-05, "loss": 1.4465, "step": 5198 }, { "epoch": 0.7447357112161581, "grad_norm": 1.198256254196167, "learning_rate": 3.227723290521405e-05, "loss": 1.5287, "step": 5199 }, { "epoch": 0.74487895716946, "grad_norm": 1.0634926557540894, "learning_rate": 3.224310194895807e-05, "loss": 1.3536, "step": 5200 }, { "epoch": 0.7450222031227618, "grad_norm": 1.2458806037902832, "learning_rate": 3.220898557908555e-05, "loss": 1.4251, "step": 5201 }, { "epoch": 0.7451654490760636, "grad_norm": 1.0965760946273804, "learning_rate": 3.217488380294083e-05, "loss": 1.4223, "step": 5202 }, { "epoch": 0.7453086950293655, "grad_norm": 1.2408809661865234, "learning_rate": 3.214079662786523e-05, "loss": 1.4341, "step": 5203 }, { "epoch": 0.7454519409826672, "grad_norm": 0.9288257956504822, "learning_rate": 3.210672406119686e-05, "loss": 1.4318, "step": 5204 }, { "epoch": 0.745595186935969, "grad_norm": 1.2427265644073486, "learning_rate": 3.207266611027069e-05, "loss": 1.3204, "step": 5205 }, { "epoch": 0.7457384328892709, "grad_norm": 1.1435372829437256, "learning_rate": 3.203862278241857e-05, "loss": 1.329, "step": 5206 }, { "epoch": 0.7458816788425727, "grad_norm": 0.9676604866981506, "learning_rate": 3.200459408496919e-05, "loss": 1.4805, "step": 5207 }, { "epoch": 0.7460249247958746, "grad_norm": 1.1434203386306763, "learning_rate": 3.197058002524811e-05, "loss": 1.2603, "step": 5208 }, { "epoch": 0.7461681707491763, "grad_norm": 1.0523406267166138, "learning_rate": 3.1936580610577636e-05, "loss": 1.4589, "step": 5209 }, { "epoch": 0.7463114167024781, "grad_norm": 1.3663837909698486, "learning_rate": 3.190259584827704e-05, "loss": 1.359, "step": 5210 }, { "epoch": 0.74645466265578, "grad_norm": 1.2006990909576416, "learning_rate": 3.186862574566245e-05, "loss": 1.4907, "step": 5211 }, { "epoch": 0.7465979086090818, "grad_norm": 1.1332813501358032, "learning_rate": 3.1834670310046734e-05, "loss": 1.4201, "step": 5212 }, { "epoch": 0.7467411545623837, "grad_norm": 1.0703095197677612, "learning_rate": 3.180072954873966e-05, "loss": 1.3224, "step": 5213 }, { "epoch": 0.7468844005156854, "grad_norm": 1.0745210647583008, "learning_rate": 3.1766803469047846e-05, "loss": 1.3616, "step": 5214 }, { "epoch": 0.7470276464689872, "grad_norm": 1.0645780563354492, "learning_rate": 3.1732892078274735e-05, "loss": 1.4676, "step": 5215 }, { "epoch": 0.7471708924222891, "grad_norm": 1.211129903793335, "learning_rate": 3.1698995383720645e-05, "loss": 1.4181, "step": 5216 }, { "epoch": 0.7473141383755909, "grad_norm": 1.1194862127304077, "learning_rate": 3.166511339268259e-05, "loss": 1.4322, "step": 5217 }, { "epoch": 0.7474573843288927, "grad_norm": 1.154075026512146, "learning_rate": 3.163124611245464e-05, "loss": 1.4809, "step": 5218 }, { "epoch": 0.7476006302821945, "grad_norm": 1.2102794647216797, "learning_rate": 3.1597393550327556e-05, "loss": 1.308, "step": 5219 }, { "epoch": 0.7477438762354963, "grad_norm": 1.0487525463104248, "learning_rate": 3.1563555713588924e-05, "loss": 1.4788, "step": 5220 }, { "epoch": 0.7478871221887982, "grad_norm": 1.1613438129425049, "learning_rate": 3.152973260952315e-05, "loss": 1.5997, "step": 5221 }, { "epoch": 0.7480303681421, "grad_norm": 1.1017931699752808, "learning_rate": 3.149592424541166e-05, "loss": 1.4539, "step": 5222 }, { "epoch": 0.7481736140954018, "grad_norm": 1.0918571949005127, "learning_rate": 3.146213062853243e-05, "loss": 1.4629, "step": 5223 }, { "epoch": 0.7483168600487036, "grad_norm": 1.0165950059890747, "learning_rate": 3.1428351766160415e-05, "loss": 1.3913, "step": 5224 }, { "epoch": 0.7484601060020054, "grad_norm": 0.9308567643165588, "learning_rate": 3.139458766556739e-05, "loss": 1.4879, "step": 5225 }, { "epoch": 0.7486033519553073, "grad_norm": 1.1123707294464111, "learning_rate": 3.136083833402192e-05, "loss": 1.3793, "step": 5226 }, { "epoch": 0.7487465979086091, "grad_norm": 1.0654362440109253, "learning_rate": 3.132710377878942e-05, "loss": 1.5298, "step": 5227 }, { "epoch": 0.7488898438619109, "grad_norm": 0.9506818056106567, "learning_rate": 3.1293384007132035e-05, "loss": 1.6356, "step": 5228 }, { "epoch": 0.7490330898152128, "grad_norm": 1.0375168323516846, "learning_rate": 3.1259679026308875e-05, "loss": 1.3733, "step": 5229 }, { "epoch": 0.7491763357685145, "grad_norm": 1.2448631525039673, "learning_rate": 3.12259888435758e-05, "loss": 1.3093, "step": 5230 }, { "epoch": 0.7493195817218163, "grad_norm": 1.1992628574371338, "learning_rate": 3.11923134661854e-05, "loss": 1.3638, "step": 5231 }, { "epoch": 0.7494628276751182, "grad_norm": 1.0951753854751587, "learning_rate": 3.1158652901387186e-05, "loss": 1.3924, "step": 5232 }, { "epoch": 0.74960607362842, "grad_norm": 0.9657092094421387, "learning_rate": 3.112500715642743e-05, "loss": 1.4188, "step": 5233 }, { "epoch": 0.7497493195817219, "grad_norm": 1.0975433588027954, "learning_rate": 3.1091376238549265e-05, "loss": 1.4857, "step": 5234 }, { "epoch": 0.7498925655350236, "grad_norm": 1.3587557077407837, "learning_rate": 3.105776015499255e-05, "loss": 1.2851, "step": 5235 }, { "epoch": 0.7500358114883254, "grad_norm": 1.4095728397369385, "learning_rate": 3.102415891299403e-05, "loss": 1.2907, "step": 5236 }, { "epoch": 0.7501790574416273, "grad_norm": 1.101123571395874, "learning_rate": 3.099057251978724e-05, "loss": 1.5271, "step": 5237 }, { "epoch": 0.7503223033949291, "grad_norm": 1.1078753471374512, "learning_rate": 3.0957000982602436e-05, "loss": 1.2573, "step": 5238 }, { "epoch": 0.750465549348231, "grad_norm": 1.1320340633392334, "learning_rate": 3.092344430866674e-05, "loss": 1.4062, "step": 5239 }, { "epoch": 0.7506087953015327, "grad_norm": 1.1358118057250977, "learning_rate": 3.088990250520417e-05, "loss": 1.3566, "step": 5240 }, { "epoch": 0.7507520412548345, "grad_norm": 0.9678964614868164, "learning_rate": 3.085637557943535e-05, "loss": 1.4855, "step": 5241 }, { "epoch": 0.7508952872081364, "grad_norm": 0.896219789981842, "learning_rate": 3.082286353857782e-05, "loss": 1.3572, "step": 5242 }, { "epoch": 0.7510385331614382, "grad_norm": 1.0818207263946533, "learning_rate": 3.0789366389845905e-05, "loss": 1.4082, "step": 5243 }, { "epoch": 0.75118177911474, "grad_norm": 1.2900702953338623, "learning_rate": 3.0755884140450705e-05, "loss": 1.3919, "step": 5244 }, { "epoch": 0.7513250250680418, "grad_norm": 0.9471254944801331, "learning_rate": 3.072241679760014e-05, "loss": 1.4226, "step": 5245 }, { "epoch": 0.7514682710213436, "grad_norm": 1.1165205240249634, "learning_rate": 3.068896436849888e-05, "loss": 1.3225, "step": 5246 }, { "epoch": 0.7516115169746455, "grad_norm": 1.321968674659729, "learning_rate": 3.0655526860348396e-05, "loss": 1.3786, "step": 5247 }, { "epoch": 0.7517547629279473, "grad_norm": 1.009322166442871, "learning_rate": 3.062210428034701e-05, "loss": 1.3402, "step": 5248 }, { "epoch": 0.7518980088812491, "grad_norm": 1.2682723999023438, "learning_rate": 3.058869663568967e-05, "loss": 1.393, "step": 5249 }, { "epoch": 0.752041254834551, "grad_norm": 1.3009908199310303, "learning_rate": 3.055530393356831e-05, "loss": 1.4349, "step": 5250 }, { "epoch": 0.7521845007878527, "grad_norm": 1.046252727508545, "learning_rate": 3.0521926181171566e-05, "loss": 1.3119, "step": 5251 }, { "epoch": 0.7523277467411545, "grad_norm": 1.127808928489685, "learning_rate": 3.0488563385684764e-05, "loss": 1.215, "step": 5252 }, { "epoch": 0.7524709926944564, "grad_norm": 1.0358679294586182, "learning_rate": 3.0455215554290128e-05, "loss": 1.2998, "step": 5253 }, { "epoch": 0.7526142386477582, "grad_norm": 1.1149992942810059, "learning_rate": 3.0421882694166602e-05, "loss": 1.5254, "step": 5254 }, { "epoch": 0.7527574846010601, "grad_norm": 1.065881609916687, "learning_rate": 3.038856481248996e-05, "loss": 1.3185, "step": 5255 }, { "epoch": 0.7529007305543618, "grad_norm": 0.9617979526519775, "learning_rate": 3.0355261916432688e-05, "loss": 1.3173, "step": 5256 }, { "epoch": 0.7530439765076636, "grad_norm": 1.0790705680847168, "learning_rate": 3.032197401316409e-05, "loss": 1.3111, "step": 5257 }, { "epoch": 0.7531872224609655, "grad_norm": 1.1235885620117188, "learning_rate": 3.028870110985025e-05, "loss": 1.395, "step": 5258 }, { "epoch": 0.7533304684142673, "grad_norm": 1.0750095844268799, "learning_rate": 3.0255443213653943e-05, "loss": 1.2471, "step": 5259 }, { "epoch": 0.7534737143675692, "grad_norm": 1.0138497352600098, "learning_rate": 3.0222200331734772e-05, "loss": 1.3654, "step": 5260 }, { "epoch": 0.7536169603208709, "grad_norm": 1.2020118236541748, "learning_rate": 3.0188972471249198e-05, "loss": 1.494, "step": 5261 }, { "epoch": 0.7537602062741727, "grad_norm": 1.0309174060821533, "learning_rate": 3.015575963935027e-05, "loss": 1.2139, "step": 5262 }, { "epoch": 0.7539034522274746, "grad_norm": 0.9640597105026245, "learning_rate": 3.0122561843187914e-05, "loss": 1.4568, "step": 5263 }, { "epoch": 0.7540466981807764, "grad_norm": 1.1459474563598633, "learning_rate": 3.0089379089908786e-05, "loss": 1.4002, "step": 5264 }, { "epoch": 0.7541899441340782, "grad_norm": 1.067918062210083, "learning_rate": 3.005621138665633e-05, "loss": 1.5095, "step": 5265 }, { "epoch": 0.75433319008738, "grad_norm": 0.9664956331253052, "learning_rate": 3.0023058740570754e-05, "loss": 1.4564, "step": 5266 }, { "epoch": 0.7544764360406818, "grad_norm": 1.1434638500213623, "learning_rate": 2.9989921158788902e-05, "loss": 1.4323, "step": 5267 }, { "epoch": 0.7546196819939837, "grad_norm": 1.0754722356796265, "learning_rate": 2.9956798648444584e-05, "loss": 1.307, "step": 5268 }, { "epoch": 0.7547629279472855, "grad_norm": 1.0213178396224976, "learning_rate": 2.9923691216668238e-05, "loss": 1.5007, "step": 5269 }, { "epoch": 0.7549061739005873, "grad_norm": 1.0263917446136475, "learning_rate": 2.989059887058703e-05, "loss": 1.4063, "step": 5270 }, { "epoch": 0.7550494198538892, "grad_norm": 1.573898434638977, "learning_rate": 2.9857521617324914e-05, "loss": 1.3927, "step": 5271 }, { "epoch": 0.7551926658071909, "grad_norm": 1.0526155233383179, "learning_rate": 2.9824459464002697e-05, "loss": 1.2306, "step": 5272 }, { "epoch": 0.7553359117604928, "grad_norm": 0.9442198276519775, "learning_rate": 2.979141241773775e-05, "loss": 1.4535, "step": 5273 }, { "epoch": 0.7554791577137946, "grad_norm": 1.1698716878890991, "learning_rate": 2.9758380485644323e-05, "loss": 1.3061, "step": 5274 }, { "epoch": 0.7556224036670964, "grad_norm": 1.0386513471603394, "learning_rate": 2.9725363674833362e-05, "loss": 1.3918, "step": 5275 }, { "epoch": 0.7557656496203983, "grad_norm": 0.9259045124053955, "learning_rate": 2.9692361992412577e-05, "loss": 1.3563, "step": 5276 }, { "epoch": 0.7559088955737, "grad_norm": 0.883860170841217, "learning_rate": 2.965937544548645e-05, "loss": 1.5507, "step": 5277 }, { "epoch": 0.7560521415270018, "grad_norm": 1.2496848106384277, "learning_rate": 2.9626404041156053e-05, "loss": 1.4985, "step": 5278 }, { "epoch": 0.7561953874803037, "grad_norm": 1.0576503276824951, "learning_rate": 2.9593447786519425e-05, "loss": 1.4358, "step": 5279 }, { "epoch": 0.7563386334336055, "grad_norm": 1.0769339799880981, "learning_rate": 2.956050668867123e-05, "loss": 1.47, "step": 5280 }, { "epoch": 0.7564818793869074, "grad_norm": 1.1733505725860596, "learning_rate": 2.952758075470281e-05, "loss": 1.4854, "step": 5281 }, { "epoch": 0.7566251253402091, "grad_norm": 1.1533260345458984, "learning_rate": 2.949466999170233e-05, "loss": 1.4455, "step": 5282 }, { "epoch": 0.7567683712935109, "grad_norm": 1.0212055444717407, "learning_rate": 2.946177440675466e-05, "loss": 1.4097, "step": 5283 }, { "epoch": 0.7569116172468128, "grad_norm": 1.1643275022506714, "learning_rate": 2.942889400694141e-05, "loss": 1.3051, "step": 5284 }, { "epoch": 0.7570548632001146, "grad_norm": 1.2512731552124023, "learning_rate": 2.9396028799340924e-05, "loss": 1.4109, "step": 5285 }, { "epoch": 0.7571981091534165, "grad_norm": 1.0877684354782104, "learning_rate": 2.9363178791028257e-05, "loss": 1.3648, "step": 5286 }, { "epoch": 0.7573413551067182, "grad_norm": 1.0833747386932373, "learning_rate": 2.9330343989075236e-05, "loss": 1.5236, "step": 5287 }, { "epoch": 0.75748460106002, "grad_norm": 1.006443738937378, "learning_rate": 2.9297524400550325e-05, "loss": 1.3639, "step": 5288 }, { "epoch": 0.7576278470133219, "grad_norm": 1.2001993656158447, "learning_rate": 2.9264720032518756e-05, "loss": 1.34, "step": 5289 }, { "epoch": 0.7577710929666237, "grad_norm": 1.043632984161377, "learning_rate": 2.9231930892042614e-05, "loss": 1.3043, "step": 5290 }, { "epoch": 0.7579143389199255, "grad_norm": 1.0485841035842896, "learning_rate": 2.9199156986180463e-05, "loss": 1.371, "step": 5291 }, { "epoch": 0.7580575848732274, "grad_norm": 1.188919186592102, "learning_rate": 2.9166398321987774e-05, "loss": 1.4428, "step": 5292 }, { "epoch": 0.7582008308265291, "grad_norm": 0.9221444725990295, "learning_rate": 2.9133654906516672e-05, "loss": 1.5253, "step": 5293 }, { "epoch": 0.758344076779831, "grad_norm": 0.9905220866203308, "learning_rate": 2.9100926746815992e-05, "loss": 1.5086, "step": 5294 }, { "epoch": 0.7584873227331328, "grad_norm": 0.9922628998756409, "learning_rate": 2.9068213849931338e-05, "loss": 1.5098, "step": 5295 }, { "epoch": 0.7586305686864346, "grad_norm": 1.106137752532959, "learning_rate": 2.903551622290489e-05, "loss": 1.5416, "step": 5296 }, { "epoch": 0.7587738146397365, "grad_norm": 1.348939299583435, "learning_rate": 2.9002833872775735e-05, "loss": 1.2668, "step": 5297 }, { "epoch": 0.7589170605930382, "grad_norm": 1.1245379447937012, "learning_rate": 2.8970166806579568e-05, "loss": 1.1444, "step": 5298 }, { "epoch": 0.75906030654634, "grad_norm": 1.4945236444473267, "learning_rate": 2.893751503134874e-05, "loss": 1.39, "step": 5299 }, { "epoch": 0.7592035524996419, "grad_norm": 1.1722297668457031, "learning_rate": 2.8904878554112367e-05, "loss": 1.2366, "step": 5300 }, { "epoch": 0.7593467984529437, "grad_norm": 1.0624130964279175, "learning_rate": 2.8872257381896385e-05, "loss": 1.4595, "step": 5301 }, { "epoch": 0.7594900444062456, "grad_norm": 1.006986379623413, "learning_rate": 2.883965152172321e-05, "loss": 1.3608, "step": 5302 }, { "epoch": 0.7596332903595473, "grad_norm": 1.014585018157959, "learning_rate": 2.880706098061211e-05, "loss": 1.4577, "step": 5303 }, { "epoch": 0.7597765363128491, "grad_norm": 0.8617925047874451, "learning_rate": 2.877448576557904e-05, "loss": 1.3414, "step": 5304 }, { "epoch": 0.759919782266151, "grad_norm": 1.217434048652649, "learning_rate": 2.874192588363662e-05, "loss": 1.264, "step": 5305 }, { "epoch": 0.7600630282194528, "grad_norm": 0.9415032267570496, "learning_rate": 2.8709381341794204e-05, "loss": 1.4105, "step": 5306 }, { "epoch": 0.7602062741727547, "grad_norm": 1.0786097049713135, "learning_rate": 2.867685214705781e-05, "loss": 1.4373, "step": 5307 }, { "epoch": 0.7603495201260564, "grad_norm": 1.1815820932388306, "learning_rate": 2.8644338306430208e-05, "loss": 1.3294, "step": 5308 }, { "epoch": 0.7604927660793582, "grad_norm": 1.1950478553771973, "learning_rate": 2.8611839826910757e-05, "loss": 1.3058, "step": 5309 }, { "epoch": 0.7606360120326601, "grad_norm": 1.2802622318267822, "learning_rate": 2.8579356715495577e-05, "loss": 1.4427, "step": 5310 }, { "epoch": 0.7607792579859619, "grad_norm": 1.1187304258346558, "learning_rate": 2.8546888979177578e-05, "loss": 1.5523, "step": 5311 }, { "epoch": 0.7609225039392638, "grad_norm": 1.20724356174469, "learning_rate": 2.851443662494615e-05, "loss": 1.3855, "step": 5312 }, { "epoch": 0.7610657498925655, "grad_norm": 1.4193682670593262, "learning_rate": 2.848199965978753e-05, "loss": 1.4285, "step": 5313 }, { "epoch": 0.7612089958458673, "grad_norm": 1.416662335395813, "learning_rate": 2.8449578090684593e-05, "loss": 1.4097, "step": 5314 }, { "epoch": 0.7613522417991692, "grad_norm": 1.062666654586792, "learning_rate": 2.841717192461688e-05, "loss": 1.3727, "step": 5315 }, { "epoch": 0.761495487752471, "grad_norm": 0.9586269855499268, "learning_rate": 2.8384781168560693e-05, "loss": 1.4306, "step": 5316 }, { "epoch": 0.7616387337057728, "grad_norm": 1.1697758436203003, "learning_rate": 2.835240582948886e-05, "loss": 1.357, "step": 5317 }, { "epoch": 0.7617819796590747, "grad_norm": 1.032918095588684, "learning_rate": 2.8320045914371074e-05, "loss": 1.5393, "step": 5318 }, { "epoch": 0.7619252256123764, "grad_norm": 1.2699965238571167, "learning_rate": 2.828770143017363e-05, "loss": 1.1875, "step": 5319 }, { "epoch": 0.7620684715656783, "grad_norm": 1.2660740613937378, "learning_rate": 2.8255372383859435e-05, "loss": 1.4097, "step": 5320 }, { "epoch": 0.7622117175189801, "grad_norm": 1.082439661026001, "learning_rate": 2.8223058782388134e-05, "loss": 1.5422, "step": 5321 }, { "epoch": 0.7623549634722819, "grad_norm": 1.01420259475708, "learning_rate": 2.8190760632716127e-05, "loss": 1.4008, "step": 5322 }, { "epoch": 0.7624982094255838, "grad_norm": 1.111799955368042, "learning_rate": 2.8158477941796336e-05, "loss": 1.337, "step": 5323 }, { "epoch": 0.7626414553788855, "grad_norm": 1.2761080265045166, "learning_rate": 2.8126210716578427e-05, "loss": 1.2787, "step": 5324 }, { "epoch": 0.7627847013321873, "grad_norm": 1.1700340509414673, "learning_rate": 2.809395896400876e-05, "loss": 1.341, "step": 5325 }, { "epoch": 0.7629279472854892, "grad_norm": 1.0529197454452515, "learning_rate": 2.8061722691030335e-05, "loss": 1.2499, "step": 5326 }, { "epoch": 0.763071193238791, "grad_norm": 1.132962703704834, "learning_rate": 2.8029501904582835e-05, "loss": 1.4047, "step": 5327 }, { "epoch": 0.7632144391920929, "grad_norm": 1.0735987424850464, "learning_rate": 2.799729661160253e-05, "loss": 1.4283, "step": 5328 }, { "epoch": 0.7633576851453946, "grad_norm": 1.1575231552124023, "learning_rate": 2.7965106819022504e-05, "loss": 1.5334, "step": 5329 }, { "epoch": 0.7635009310986964, "grad_norm": 1.0861072540283203, "learning_rate": 2.7932932533772417e-05, "loss": 1.5401, "step": 5330 }, { "epoch": 0.7636441770519983, "grad_norm": 1.032377004623413, "learning_rate": 2.790077376277854e-05, "loss": 1.3929, "step": 5331 }, { "epoch": 0.7637874230053001, "grad_norm": 1.1348316669464111, "learning_rate": 2.786863051296391e-05, "loss": 1.4727, "step": 5332 }, { "epoch": 0.763930668958602, "grad_norm": 1.0201689004898071, "learning_rate": 2.7836502791248142e-05, "loss": 1.5093, "step": 5333 }, { "epoch": 0.7640739149119037, "grad_norm": 1.0382293462753296, "learning_rate": 2.7804390604547557e-05, "loss": 1.4964, "step": 5334 }, { "epoch": 0.7642171608652055, "grad_norm": 1.093854546546936, "learning_rate": 2.777229395977511e-05, "loss": 1.4272, "step": 5335 }, { "epoch": 0.7643604068185074, "grad_norm": 1.0585594177246094, "learning_rate": 2.774021286384042e-05, "loss": 1.4228, "step": 5336 }, { "epoch": 0.7645036527718092, "grad_norm": 1.0755082368850708, "learning_rate": 2.770814732364978e-05, "loss": 1.3671, "step": 5337 }, { "epoch": 0.764646898725111, "grad_norm": 1.0224575996398926, "learning_rate": 2.7676097346106034e-05, "loss": 1.3841, "step": 5338 }, { "epoch": 0.7647901446784129, "grad_norm": 1.1585297584533691, "learning_rate": 2.7644062938108774e-05, "loss": 1.2674, "step": 5339 }, { "epoch": 0.7649333906317146, "grad_norm": 0.9601103067398071, "learning_rate": 2.761204410655428e-05, "loss": 1.5596, "step": 5340 }, { "epoch": 0.7650766365850165, "grad_norm": 1.2528530359268188, "learning_rate": 2.7580040858335345e-05, "loss": 1.3788, "step": 5341 }, { "epoch": 0.7652198825383183, "grad_norm": 1.1307536363601685, "learning_rate": 2.7548053200341496e-05, "loss": 1.3003, "step": 5342 }, { "epoch": 0.7653631284916201, "grad_norm": 1.1641361713409424, "learning_rate": 2.7516081139458883e-05, "loss": 1.5079, "step": 5343 }, { "epoch": 0.765506374444922, "grad_norm": 1.0204488039016724, "learning_rate": 2.7484124682570302e-05, "loss": 1.4381, "step": 5344 }, { "epoch": 0.7656496203982237, "grad_norm": 1.0582654476165771, "learning_rate": 2.7452183836555212e-05, "loss": 1.3036, "step": 5345 }, { "epoch": 0.7657928663515255, "grad_norm": 1.0235873460769653, "learning_rate": 2.7420258608289607e-05, "loss": 1.3728, "step": 5346 }, { "epoch": 0.7659361123048274, "grad_norm": 1.0706382989883423, "learning_rate": 2.7388349004646285e-05, "loss": 1.3289, "step": 5347 }, { "epoch": 0.7660793582581292, "grad_norm": 1.0654786825180054, "learning_rate": 2.7356455032494598e-05, "loss": 1.373, "step": 5348 }, { "epoch": 0.7662226042114311, "grad_norm": 1.123969316482544, "learning_rate": 2.7324576698700453e-05, "loss": 1.4721, "step": 5349 }, { "epoch": 0.7663658501647328, "grad_norm": 1.0574196577072144, "learning_rate": 2.7292714010126484e-05, "loss": 1.4121, "step": 5350 }, { "epoch": 0.7665090961180346, "grad_norm": 1.080439805984497, "learning_rate": 2.7260866973632025e-05, "loss": 1.2691, "step": 5351 }, { "epoch": 0.7666523420713365, "grad_norm": 1.0477899312973022, "learning_rate": 2.722903559607286e-05, "loss": 1.4546, "step": 5352 }, { "epoch": 0.7667955880246383, "grad_norm": 1.2967371940612793, "learning_rate": 2.719721988430153e-05, "loss": 1.2958, "step": 5353 }, { "epoch": 0.7669388339779402, "grad_norm": 1.31163489818573, "learning_rate": 2.7165419845167172e-05, "loss": 1.5292, "step": 5354 }, { "epoch": 0.7670820799312419, "grad_norm": 0.9819673299789429, "learning_rate": 2.713363548551554e-05, "loss": 1.4179, "step": 5355 }, { "epoch": 0.7672253258845437, "grad_norm": 1.3790018558502197, "learning_rate": 2.7101866812189057e-05, "loss": 1.248, "step": 5356 }, { "epoch": 0.7673685718378456, "grad_norm": 1.0061616897583008, "learning_rate": 2.7070113832026643e-05, "loss": 1.4502, "step": 5357 }, { "epoch": 0.7675118177911474, "grad_norm": 1.0803977251052856, "learning_rate": 2.7038376551864008e-05, "loss": 1.498, "step": 5358 }, { "epoch": 0.7676550637444493, "grad_norm": 1.0983235836029053, "learning_rate": 2.7006654978533417e-05, "loss": 1.4305, "step": 5359 }, { "epoch": 0.7677983096977511, "grad_norm": 1.0291062593460083, "learning_rate": 2.697494911886368e-05, "loss": 1.3164, "step": 5360 }, { "epoch": 0.7679415556510528, "grad_norm": 1.154427170753479, "learning_rate": 2.6943258979680308e-05, "loss": 1.4109, "step": 5361 }, { "epoch": 0.7680848016043547, "grad_norm": 1.14201021194458, "learning_rate": 2.6911584567805393e-05, "loss": 1.5377, "step": 5362 }, { "epoch": 0.7682280475576565, "grad_norm": 1.0191935300827026, "learning_rate": 2.687992589005768e-05, "loss": 1.3696, "step": 5363 }, { "epoch": 0.7683712935109583, "grad_norm": 1.2176258563995361, "learning_rate": 2.6848282953252467e-05, "loss": 1.5087, "step": 5364 }, { "epoch": 0.7685145394642602, "grad_norm": 1.2263424396514893, "learning_rate": 2.6816655764201714e-05, "loss": 1.3312, "step": 5365 }, { "epoch": 0.7686577854175619, "grad_norm": 0.982971727848053, "learning_rate": 2.6785044329714004e-05, "loss": 1.4073, "step": 5366 }, { "epoch": 0.7688010313708638, "grad_norm": 1.1179509162902832, "learning_rate": 2.6753448656594393e-05, "loss": 1.2986, "step": 5367 }, { "epoch": 0.7689442773241656, "grad_norm": 1.0811750888824463, "learning_rate": 2.672186875164475e-05, "loss": 1.3693, "step": 5368 }, { "epoch": 0.7690875232774674, "grad_norm": 0.9156041145324707, "learning_rate": 2.6690304621663442e-05, "loss": 1.6285, "step": 5369 }, { "epoch": 0.7692307692307693, "grad_norm": 1.072241187095642, "learning_rate": 2.6658756273445386e-05, "loss": 1.4609, "step": 5370 }, { "epoch": 0.769374015184071, "grad_norm": 1.5767362117767334, "learning_rate": 2.6627223713782157e-05, "loss": 1.3948, "step": 5371 }, { "epoch": 0.7695172611373728, "grad_norm": 1.1711581945419312, "learning_rate": 2.659570694946203e-05, "loss": 1.4621, "step": 5372 }, { "epoch": 0.7696605070906747, "grad_norm": 1.2863517999649048, "learning_rate": 2.6564205987269696e-05, "loss": 1.3688, "step": 5373 }, { "epoch": 0.7698037530439765, "grad_norm": 1.351944923400879, "learning_rate": 2.653272083398656e-05, "loss": 1.3103, "step": 5374 }, { "epoch": 0.7699469989972784, "grad_norm": 1.1083176136016846, "learning_rate": 2.650125149639059e-05, "loss": 1.1606, "step": 5375 }, { "epoch": 0.7700902449505801, "grad_norm": 1.153162956237793, "learning_rate": 2.646979798125636e-05, "loss": 1.5447, "step": 5376 }, { "epoch": 0.7702334909038819, "grad_norm": 1.1354740858078003, "learning_rate": 2.643836029535507e-05, "loss": 1.5613, "step": 5377 }, { "epoch": 0.7703767368571838, "grad_norm": 1.1255152225494385, "learning_rate": 2.6406938445454376e-05, "loss": 1.3382, "step": 5378 }, { "epoch": 0.7705199828104856, "grad_norm": 1.068695306777954, "learning_rate": 2.6375532438318716e-05, "loss": 1.4676, "step": 5379 }, { "epoch": 0.7706632287637875, "grad_norm": 0.9634365439414978, "learning_rate": 2.634414228070904e-05, "loss": 1.4151, "step": 5380 }, { "epoch": 0.7708064747170893, "grad_norm": 1.0725656747817993, "learning_rate": 2.631276797938279e-05, "loss": 1.4625, "step": 5381 }, { "epoch": 0.770949720670391, "grad_norm": 1.060855507850647, "learning_rate": 2.6281409541094127e-05, "loss": 1.3603, "step": 5382 }, { "epoch": 0.7710929666236929, "grad_norm": 1.0964360237121582, "learning_rate": 2.6250066972593735e-05, "loss": 1.3861, "step": 5383 }, { "epoch": 0.7712362125769947, "grad_norm": 1.2195426225662231, "learning_rate": 2.6218740280628896e-05, "loss": 1.4804, "step": 5384 }, { "epoch": 0.7713794585302965, "grad_norm": 0.9624045491218567, "learning_rate": 2.618742947194347e-05, "loss": 1.4278, "step": 5385 }, { "epoch": 0.7715227044835984, "grad_norm": 1.1158347129821777, "learning_rate": 2.615613455327791e-05, "loss": 1.6594, "step": 5386 }, { "epoch": 0.7716659504369001, "grad_norm": 1.26571524143219, "learning_rate": 2.612485553136925e-05, "loss": 1.3648, "step": 5387 }, { "epoch": 0.771809196390202, "grad_norm": 1.1605147123336792, "learning_rate": 2.609359241295104e-05, "loss": 1.4667, "step": 5388 }, { "epoch": 0.7719524423435038, "grad_norm": 1.053276538848877, "learning_rate": 2.6062345204753457e-05, "loss": 1.2711, "step": 5389 }, { "epoch": 0.7720956882968056, "grad_norm": 0.9543305039405823, "learning_rate": 2.6031113913503337e-05, "loss": 1.514, "step": 5390 }, { "epoch": 0.7722389342501075, "grad_norm": 1.0924980640411377, "learning_rate": 2.5999898545923908e-05, "loss": 1.4086, "step": 5391 }, { "epoch": 0.7723821802034092, "grad_norm": 1.0135517120361328, "learning_rate": 2.596869910873512e-05, "loss": 1.4942, "step": 5392 }, { "epoch": 0.772525426156711, "grad_norm": 0.9889259338378906, "learning_rate": 2.5937515608653408e-05, "loss": 1.3339, "step": 5393 }, { "epoch": 0.7726686721100129, "grad_norm": 1.1338982582092285, "learning_rate": 2.5906348052391828e-05, "loss": 1.5144, "step": 5394 }, { "epoch": 0.7728119180633147, "grad_norm": 0.9133114814758301, "learning_rate": 2.587519644666001e-05, "loss": 1.3932, "step": 5395 }, { "epoch": 0.7729551640166166, "grad_norm": 1.1685774326324463, "learning_rate": 2.5844060798164038e-05, "loss": 1.3839, "step": 5396 }, { "epoch": 0.7730984099699183, "grad_norm": 1.1761701107025146, "learning_rate": 2.5812941113606726e-05, "loss": 1.388, "step": 5397 }, { "epoch": 0.7732416559232201, "grad_norm": 1.1376409530639648, "learning_rate": 2.578183739968738e-05, "loss": 1.3842, "step": 5398 }, { "epoch": 0.773384901876522, "grad_norm": 1.0157896280288696, "learning_rate": 2.575074966310179e-05, "loss": 1.3521, "step": 5399 }, { "epoch": 0.7735281478298238, "grad_norm": 1.0421208143234253, "learning_rate": 2.5719677910542394e-05, "loss": 1.4851, "step": 5400 }, { "epoch": 0.7736713937831257, "grad_norm": 1.0282460451126099, "learning_rate": 2.568862214869825e-05, "loss": 1.212, "step": 5401 }, { "epoch": 0.7738146397364275, "grad_norm": 1.0647400617599487, "learning_rate": 2.5657582384254796e-05, "loss": 1.4748, "step": 5402 }, { "epoch": 0.7739578856897292, "grad_norm": 1.051937222480774, "learning_rate": 2.562655862389418e-05, "loss": 1.4836, "step": 5403 }, { "epoch": 0.7741011316430311, "grad_norm": 1.0603488683700562, "learning_rate": 2.5595550874295027e-05, "loss": 1.6119, "step": 5404 }, { "epoch": 0.7742443775963329, "grad_norm": 1.0364676713943481, "learning_rate": 2.556455914213255e-05, "loss": 1.3752, "step": 5405 }, { "epoch": 0.7743876235496348, "grad_norm": 0.9507237672805786, "learning_rate": 2.5533583434078523e-05, "loss": 1.0841, "step": 5406 }, { "epoch": 0.7745308695029366, "grad_norm": 1.0436803102493286, "learning_rate": 2.550262375680117e-05, "loss": 1.1878, "step": 5407 }, { "epoch": 0.7746741154562383, "grad_norm": 1.3421522378921509, "learning_rate": 2.5471680116965425e-05, "loss": 1.3296, "step": 5408 }, { "epoch": 0.7748173614095402, "grad_norm": 0.9931652545928955, "learning_rate": 2.5440752521232692e-05, "loss": 1.3149, "step": 5409 }, { "epoch": 0.774960607362842, "grad_norm": 1.006361722946167, "learning_rate": 2.5409840976260855e-05, "loss": 1.4635, "step": 5410 }, { "epoch": 0.7751038533161438, "grad_norm": 1.1905099153518677, "learning_rate": 2.5378945488704443e-05, "loss": 1.5726, "step": 5411 }, { "epoch": 0.7752470992694457, "grad_norm": 1.0121513605117798, "learning_rate": 2.5348066065214482e-05, "loss": 1.4586, "step": 5412 }, { "epoch": 0.7753903452227474, "grad_norm": 1.0191130638122559, "learning_rate": 2.5317202712438535e-05, "loss": 1.255, "step": 5413 }, { "epoch": 0.7755335911760493, "grad_norm": 1.1988415718078613, "learning_rate": 2.5286355437020746e-05, "loss": 1.3644, "step": 5414 }, { "epoch": 0.7756768371293511, "grad_norm": 1.2554223537445068, "learning_rate": 2.5255524245601748e-05, "loss": 1.4097, "step": 5415 }, { "epoch": 0.7758200830826529, "grad_norm": 0.9802923798561096, "learning_rate": 2.5224709144818782e-05, "loss": 1.4447, "step": 5416 }, { "epoch": 0.7759633290359548, "grad_norm": 1.3789926767349243, "learning_rate": 2.51939101413055e-05, "loss": 1.5304, "step": 5417 }, { "epoch": 0.7761065749892565, "grad_norm": 1.0385291576385498, "learning_rate": 2.5163127241692165e-05, "loss": 1.4261, "step": 5418 }, { "epoch": 0.7762498209425583, "grad_norm": 0.8672406077384949, "learning_rate": 2.5132360452605673e-05, "loss": 1.3072, "step": 5419 }, { "epoch": 0.7763930668958602, "grad_norm": 1.1061148643493652, "learning_rate": 2.5101609780669266e-05, "loss": 1.3123, "step": 5420 }, { "epoch": 0.776536312849162, "grad_norm": 1.088796854019165, "learning_rate": 2.507087523250282e-05, "loss": 1.4215, "step": 5421 }, { "epoch": 0.7766795588024639, "grad_norm": 1.0772571563720703, "learning_rate": 2.5040156814722727e-05, "loss": 1.4445, "step": 5422 }, { "epoch": 0.7768228047557656, "grad_norm": 1.0032541751861572, "learning_rate": 2.50094545339419e-05, "loss": 1.4058, "step": 5423 }, { "epoch": 0.7769660507090674, "grad_norm": 1.3033168315887451, "learning_rate": 2.4978768396769824e-05, "loss": 1.3154, "step": 5424 }, { "epoch": 0.7771092966623693, "grad_norm": 1.084582805633545, "learning_rate": 2.494809840981236e-05, "loss": 1.4181, "step": 5425 }, { "epoch": 0.7772525426156711, "grad_norm": 1.025909423828125, "learning_rate": 2.491744457967209e-05, "loss": 1.4027, "step": 5426 }, { "epoch": 0.777395788568973, "grad_norm": 1.0471577644348145, "learning_rate": 2.4886806912948035e-05, "loss": 1.4609, "step": 5427 }, { "epoch": 0.7775390345222748, "grad_norm": 1.0506747961044312, "learning_rate": 2.4856185416235656e-05, "loss": 1.4481, "step": 5428 }, { "epoch": 0.7776822804755765, "grad_norm": 1.1074923276901245, "learning_rate": 2.4825580096126998e-05, "loss": 1.4442, "step": 5429 }, { "epoch": 0.7778255264288784, "grad_norm": 1.0668188333511353, "learning_rate": 2.479499095921074e-05, "loss": 1.4821, "step": 5430 }, { "epoch": 0.7779687723821802, "grad_norm": 1.0391645431518555, "learning_rate": 2.4764418012071855e-05, "loss": 1.406, "step": 5431 }, { "epoch": 0.778112018335482, "grad_norm": 1.0142494440078735, "learning_rate": 2.473386126129198e-05, "loss": 1.37, "step": 5432 }, { "epoch": 0.7782552642887839, "grad_norm": 0.8885267972946167, "learning_rate": 2.470332071344923e-05, "loss": 1.3655, "step": 5433 }, { "epoch": 0.7783985102420856, "grad_norm": 1.2007206678390503, "learning_rate": 2.4672796375118225e-05, "loss": 1.4813, "step": 5434 }, { "epoch": 0.7785417561953875, "grad_norm": 1.0071890354156494, "learning_rate": 2.4642288252870106e-05, "loss": 1.3395, "step": 5435 }, { "epoch": 0.7786850021486893, "grad_norm": 1.0513888597488403, "learning_rate": 2.461179635327251e-05, "loss": 1.3781, "step": 5436 }, { "epoch": 0.7788282481019911, "grad_norm": 1.2440940141677856, "learning_rate": 2.458132068288962e-05, "loss": 1.3451, "step": 5437 }, { "epoch": 0.778971494055293, "grad_norm": 1.2658860683441162, "learning_rate": 2.4550861248282032e-05, "loss": 1.2586, "step": 5438 }, { "epoch": 0.7791147400085947, "grad_norm": 1.0538208484649658, "learning_rate": 2.4520418056006912e-05, "loss": 1.4797, "step": 5439 }, { "epoch": 0.7792579859618965, "grad_norm": 1.1901886463165283, "learning_rate": 2.4489991112618017e-05, "loss": 1.4614, "step": 5440 }, { "epoch": 0.7794012319151984, "grad_norm": 0.9623347520828247, "learning_rate": 2.4459580424665417e-05, "loss": 1.3402, "step": 5441 }, { "epoch": 0.7795444778685002, "grad_norm": 1.0573067665100098, "learning_rate": 2.4429185998695825e-05, "loss": 1.4963, "step": 5442 }, { "epoch": 0.7796877238218021, "grad_norm": 1.0454970598220825, "learning_rate": 2.43988078412524e-05, "loss": 1.2832, "step": 5443 }, { "epoch": 0.7798309697751038, "grad_norm": 0.9679948687553406, "learning_rate": 2.4368445958874807e-05, "loss": 1.4636, "step": 5444 }, { "epoch": 0.7799742157284056, "grad_norm": 1.2437002658843994, "learning_rate": 2.4338100358099235e-05, "loss": 1.5036, "step": 5445 }, { "epoch": 0.7801174616817075, "grad_norm": 1.1896706819534302, "learning_rate": 2.430777104545826e-05, "loss": 1.346, "step": 5446 }, { "epoch": 0.7802607076350093, "grad_norm": 1.0673450231552124, "learning_rate": 2.4277458027481104e-05, "loss": 1.4702, "step": 5447 }, { "epoch": 0.7804039535883112, "grad_norm": 1.200490117073059, "learning_rate": 2.4247161310693434e-05, "loss": 1.327, "step": 5448 }, { "epoch": 0.780547199541613, "grad_norm": 1.1671743392944336, "learning_rate": 2.4216880901617313e-05, "loss": 1.4504, "step": 5449 }, { "epoch": 0.7806904454949147, "grad_norm": 1.1693484783172607, "learning_rate": 2.4186616806771357e-05, "loss": 1.3291, "step": 5450 }, { "epoch": 0.7808336914482166, "grad_norm": 1.1167196035385132, "learning_rate": 2.415636903267078e-05, "loss": 1.4, "step": 5451 }, { "epoch": 0.7809769374015184, "grad_norm": 1.4478158950805664, "learning_rate": 2.412613758582707e-05, "loss": 1.5248, "step": 5452 }, { "epoch": 0.7811201833548203, "grad_norm": 0.9325673580169678, "learning_rate": 2.4095922472748367e-05, "loss": 1.2945, "step": 5453 }, { "epoch": 0.7812634293081221, "grad_norm": 0.9552785158157349, "learning_rate": 2.4065723699939203e-05, "loss": 1.6263, "step": 5454 }, { "epoch": 0.7814066752614238, "grad_norm": 0.9998942017555237, "learning_rate": 2.4035541273900663e-05, "loss": 1.5621, "step": 5455 }, { "epoch": 0.7815499212147257, "grad_norm": 1.0076566934585571, "learning_rate": 2.4005375201130274e-05, "loss": 1.3258, "step": 5456 }, { "epoch": 0.7816931671680275, "grad_norm": 1.1170464754104614, "learning_rate": 2.3975225488121976e-05, "loss": 1.4238, "step": 5457 }, { "epoch": 0.7818364131213293, "grad_norm": 1.0396312475204468, "learning_rate": 2.3945092141366343e-05, "loss": 1.3998, "step": 5458 }, { "epoch": 0.7819796590746312, "grad_norm": 1.184221863746643, "learning_rate": 2.3914975167350328e-05, "loss": 1.4981, "step": 5459 }, { "epoch": 0.7821229050279329, "grad_norm": 1.058962106704712, "learning_rate": 2.3884874572557316e-05, "loss": 1.4708, "step": 5460 }, { "epoch": 0.7822661509812348, "grad_norm": 1.1053345203399658, "learning_rate": 2.3854790363467262e-05, "loss": 1.3753, "step": 5461 }, { "epoch": 0.7824093969345366, "grad_norm": 1.124083161354065, "learning_rate": 2.3824722546556533e-05, "loss": 1.1979, "step": 5462 }, { "epoch": 0.7825526428878384, "grad_norm": 1.1387380361557007, "learning_rate": 2.3794671128297995e-05, "loss": 1.1768, "step": 5463 }, { "epoch": 0.7826958888411403, "grad_norm": 1.0596282482147217, "learning_rate": 2.3764636115160978e-05, "loss": 1.3623, "step": 5464 }, { "epoch": 0.782839134794442, "grad_norm": 1.2180850505828857, "learning_rate": 2.3734617513611266e-05, "loss": 1.2866, "step": 5465 }, { "epoch": 0.7829823807477438, "grad_norm": 1.1552622318267822, "learning_rate": 2.3704615330111156e-05, "loss": 1.3754, "step": 5466 }, { "epoch": 0.7831256267010457, "grad_norm": 1.1800016164779663, "learning_rate": 2.3674629571119332e-05, "loss": 1.4913, "step": 5467 }, { "epoch": 0.7832688726543475, "grad_norm": 1.2159316539764404, "learning_rate": 2.3644660243090966e-05, "loss": 1.3708, "step": 5468 }, { "epoch": 0.7834121186076494, "grad_norm": 1.1430907249450684, "learning_rate": 2.3614707352477804e-05, "loss": 1.2621, "step": 5469 }, { "epoch": 0.7835553645609512, "grad_norm": 0.9268341064453125, "learning_rate": 2.358477090572787e-05, "loss": 1.3888, "step": 5470 }, { "epoch": 0.7836986105142529, "grad_norm": 1.1056623458862305, "learning_rate": 2.3554850909285786e-05, "loss": 1.3127, "step": 5471 }, { "epoch": 0.7838418564675548, "grad_norm": 1.0710093975067139, "learning_rate": 2.3524947369592575e-05, "loss": 1.1727, "step": 5472 }, { "epoch": 0.7839851024208566, "grad_norm": 1.2467036247253418, "learning_rate": 2.3495060293085735e-05, "loss": 1.4337, "step": 5473 }, { "epoch": 0.7841283483741585, "grad_norm": 1.0214260816574097, "learning_rate": 2.346518968619924e-05, "loss": 1.299, "step": 5474 }, { "epoch": 0.7842715943274603, "grad_norm": 1.049080729484558, "learning_rate": 2.34353355553634e-05, "loss": 1.3697, "step": 5475 }, { "epoch": 0.784414840280762, "grad_norm": 1.2326804399490356, "learning_rate": 2.340549790700517e-05, "loss": 1.5754, "step": 5476 }, { "epoch": 0.7845580862340639, "grad_norm": 1.212562084197998, "learning_rate": 2.337567674754785e-05, "loss": 1.2214, "step": 5477 }, { "epoch": 0.7847013321873657, "grad_norm": 1.1864029169082642, "learning_rate": 2.3345872083411135e-05, "loss": 1.444, "step": 5478 }, { "epoch": 0.7848445781406675, "grad_norm": 1.181270956993103, "learning_rate": 2.3316083921011235e-05, "loss": 1.3331, "step": 5479 }, { "epoch": 0.7849878240939694, "grad_norm": 1.059954047203064, "learning_rate": 2.328631226676089e-05, "loss": 1.2838, "step": 5480 }, { "epoch": 0.7851310700472711, "grad_norm": 1.1604628562927246, "learning_rate": 2.3256557127069124e-05, "loss": 1.3718, "step": 5481 }, { "epoch": 0.785274316000573, "grad_norm": 1.1982344388961792, "learning_rate": 2.3226818508341496e-05, "loss": 1.5401, "step": 5482 }, { "epoch": 0.7854175619538748, "grad_norm": 1.0517563819885254, "learning_rate": 2.3197096416980013e-05, "loss": 1.4108, "step": 5483 }, { "epoch": 0.7855608079071766, "grad_norm": 0.9944028854370117, "learning_rate": 2.3167390859383088e-05, "loss": 1.2913, "step": 5484 }, { "epoch": 0.7857040538604785, "grad_norm": 1.021971344947815, "learning_rate": 2.3137701841945627e-05, "loss": 1.383, "step": 5485 }, { "epoch": 0.7858472998137802, "grad_norm": 1.0083472728729248, "learning_rate": 2.3108029371058848e-05, "loss": 1.3285, "step": 5486 }, { "epoch": 0.785990545767082, "grad_norm": 1.1019871234893799, "learning_rate": 2.30783734531106e-05, "loss": 1.4371, "step": 5487 }, { "epoch": 0.7861337917203839, "grad_norm": 1.0592209100723267, "learning_rate": 2.304873409448506e-05, "loss": 1.2965, "step": 5488 }, { "epoch": 0.7862770376736857, "grad_norm": 1.2254985570907593, "learning_rate": 2.3019111301562802e-05, "loss": 1.3391, "step": 5489 }, { "epoch": 0.7864202836269876, "grad_norm": 0.9758579730987549, "learning_rate": 2.2989505080720886e-05, "loss": 1.2837, "step": 5490 }, { "epoch": 0.7865635295802894, "grad_norm": 0.9957001805305481, "learning_rate": 2.2959915438332823e-05, "loss": 1.2922, "step": 5491 }, { "epoch": 0.7867067755335911, "grad_norm": 1.3815044164657593, "learning_rate": 2.293034238076851e-05, "loss": 1.2947, "step": 5492 }, { "epoch": 0.786850021486893, "grad_norm": 1.1347591876983643, "learning_rate": 2.2900785914394308e-05, "loss": 1.4194, "step": 5493 }, { "epoch": 0.7869932674401948, "grad_norm": 1.013049840927124, "learning_rate": 2.2871246045572993e-05, "loss": 1.4999, "step": 5494 }, { "epoch": 0.7871365133934967, "grad_norm": 1.357895851135254, "learning_rate": 2.2841722780663788e-05, "loss": 1.4436, "step": 5495 }, { "epoch": 0.7872797593467985, "grad_norm": 1.189643383026123, "learning_rate": 2.2812216126022245e-05, "loss": 1.4037, "step": 5496 }, { "epoch": 0.7874230053001002, "grad_norm": 0.9385799765586853, "learning_rate": 2.2782726088000495e-05, "loss": 1.4793, "step": 5497 }, { "epoch": 0.7875662512534021, "grad_norm": 0.9452044367790222, "learning_rate": 2.2753252672947022e-05, "loss": 1.3812, "step": 5498 }, { "epoch": 0.7877094972067039, "grad_norm": 1.0751274824142456, "learning_rate": 2.2723795887206657e-05, "loss": 1.5765, "step": 5499 }, { "epoch": 0.7878527431600058, "grad_norm": 1.1663727760314941, "learning_rate": 2.269435573712071e-05, "loss": 1.4953, "step": 5500 }, { "epoch": 0.7879959891133076, "grad_norm": 0.9630807638168335, "learning_rate": 2.2664932229027024e-05, "loss": 1.414, "step": 5501 }, { "epoch": 0.7881392350666093, "grad_norm": 1.1125819683074951, "learning_rate": 2.2635525369259648e-05, "loss": 1.4275, "step": 5502 }, { "epoch": 0.7882824810199112, "grad_norm": 1.153269648551941, "learning_rate": 2.260613516414919e-05, "loss": 1.4855, "step": 5503 }, { "epoch": 0.788425726973213, "grad_norm": 1.0586724281311035, "learning_rate": 2.2576761620022626e-05, "loss": 1.3624, "step": 5504 }, { "epoch": 0.7885689729265148, "grad_norm": 1.0902937650680542, "learning_rate": 2.254740474320336e-05, "loss": 1.3233, "step": 5505 }, { "epoch": 0.7887122188798167, "grad_norm": 1.1037815809249878, "learning_rate": 2.251806454001123e-05, "loss": 1.2333, "step": 5506 }, { "epoch": 0.7888554648331184, "grad_norm": 1.0841825008392334, "learning_rate": 2.248874101676236e-05, "loss": 1.5312, "step": 5507 }, { "epoch": 0.7889987107864203, "grad_norm": 1.0819047689437866, "learning_rate": 2.245943417976948e-05, "loss": 1.3083, "step": 5508 }, { "epoch": 0.7891419567397221, "grad_norm": 1.2178468704223633, "learning_rate": 2.2430144035341617e-05, "loss": 1.3101, "step": 5509 }, { "epoch": 0.7892852026930239, "grad_norm": 1.0202821493148804, "learning_rate": 2.240087058978415e-05, "loss": 1.3679, "step": 5510 }, { "epoch": 0.7894284486463258, "grad_norm": 1.1028978824615479, "learning_rate": 2.2371613849398975e-05, "loss": 1.4634, "step": 5511 }, { "epoch": 0.7895716945996276, "grad_norm": 1.057798147201538, "learning_rate": 2.234237382048433e-05, "loss": 1.3211, "step": 5512 }, { "epoch": 0.7897149405529293, "grad_norm": 0.9596744775772095, "learning_rate": 2.231315050933488e-05, "loss": 1.3493, "step": 5513 }, { "epoch": 0.7898581865062312, "grad_norm": 1.0085740089416504, "learning_rate": 2.228394392224167e-05, "loss": 1.4692, "step": 5514 }, { "epoch": 0.790001432459533, "grad_norm": 1.0538007020950317, "learning_rate": 2.2254754065492157e-05, "loss": 1.3279, "step": 5515 }, { "epoch": 0.7901446784128349, "grad_norm": 0.9678391814231873, "learning_rate": 2.2225580945370228e-05, "loss": 1.442, "step": 5516 }, { "epoch": 0.7902879243661367, "grad_norm": 1.027978777885437, "learning_rate": 2.2196424568156073e-05, "loss": 1.4822, "step": 5517 }, { "epoch": 0.7904311703194384, "grad_norm": 1.1491197347640991, "learning_rate": 2.2167284940126344e-05, "loss": 1.3251, "step": 5518 }, { "epoch": 0.7905744162727403, "grad_norm": 1.1693757772445679, "learning_rate": 2.2138162067554158e-05, "loss": 1.5452, "step": 5519 }, { "epoch": 0.7907176622260421, "grad_norm": 0.9138875603675842, "learning_rate": 2.210905595670887e-05, "loss": 1.3953, "step": 5520 }, { "epoch": 0.790860908179344, "grad_norm": 1.3093968629837036, "learning_rate": 2.207996661385634e-05, "loss": 1.3597, "step": 5521 }, { "epoch": 0.7910041541326458, "grad_norm": 0.940271258354187, "learning_rate": 2.2050894045258762e-05, "loss": 1.3858, "step": 5522 }, { "epoch": 0.7911474000859475, "grad_norm": 1.0557079315185547, "learning_rate": 2.2021838257174765e-05, "loss": 1.3408, "step": 5523 }, { "epoch": 0.7912906460392494, "grad_norm": 1.1002577543258667, "learning_rate": 2.1992799255859364e-05, "loss": 1.4487, "step": 5524 }, { "epoch": 0.7914338919925512, "grad_norm": 1.179927945137024, "learning_rate": 2.196377704756385e-05, "loss": 1.4006, "step": 5525 }, { "epoch": 0.791577137945853, "grad_norm": 0.9497984051704407, "learning_rate": 2.1934771638536054e-05, "loss": 1.3803, "step": 5526 }, { "epoch": 0.7917203838991549, "grad_norm": 1.2308974266052246, "learning_rate": 2.1905783035020157e-05, "loss": 1.3779, "step": 5527 }, { "epoch": 0.7918636298524566, "grad_norm": 1.0323413610458374, "learning_rate": 2.18768112432566e-05, "loss": 1.5662, "step": 5528 }, { "epoch": 0.7920068758057585, "grad_norm": 0.9781385064125061, "learning_rate": 2.1847856269482303e-05, "loss": 1.4688, "step": 5529 }, { "epoch": 0.7921501217590603, "grad_norm": 1.124023199081421, "learning_rate": 2.1818918119930644e-05, "loss": 1.3386, "step": 5530 }, { "epoch": 0.7922933677123621, "grad_norm": 1.148544192314148, "learning_rate": 2.1789996800831215e-05, "loss": 1.3778, "step": 5531 }, { "epoch": 0.792436613665664, "grad_norm": 1.3279540538787842, "learning_rate": 2.1761092318410072e-05, "loss": 1.3828, "step": 5532 }, { "epoch": 0.7925798596189657, "grad_norm": 0.8857930898666382, "learning_rate": 2.1732204678889632e-05, "loss": 1.3991, "step": 5533 }, { "epoch": 0.7927231055722676, "grad_norm": 0.9784302115440369, "learning_rate": 2.1703333888488708e-05, "loss": 1.3432, "step": 5534 }, { "epoch": 0.7928663515255694, "grad_norm": 1.1189216375350952, "learning_rate": 2.1674479953422477e-05, "loss": 1.3938, "step": 5535 }, { "epoch": 0.7930095974788712, "grad_norm": 1.2653486728668213, "learning_rate": 2.1645642879902406e-05, "loss": 1.2727, "step": 5536 }, { "epoch": 0.7931528434321731, "grad_norm": 1.0303517580032349, "learning_rate": 2.1616822674136473e-05, "loss": 1.3938, "step": 5537 }, { "epoch": 0.7932960893854749, "grad_norm": 0.9577755331993103, "learning_rate": 2.1588019342328968e-05, "loss": 1.3339, "step": 5538 }, { "epoch": 0.7934393353387766, "grad_norm": 1.4209814071655273, "learning_rate": 2.155923289068048e-05, "loss": 1.4727, "step": 5539 }, { "epoch": 0.7935825812920785, "grad_norm": 1.0015904903411865, "learning_rate": 2.153046332538804e-05, "loss": 1.5235, "step": 5540 }, { "epoch": 0.7937258272453803, "grad_norm": 0.9848486185073853, "learning_rate": 2.1501710652645034e-05, "loss": 1.3215, "step": 5541 }, { "epoch": 0.7938690731986822, "grad_norm": 1.1095030307769775, "learning_rate": 2.1472974878641183e-05, "loss": 1.0968, "step": 5542 }, { "epoch": 0.794012319151984, "grad_norm": 1.1565525531768799, "learning_rate": 2.1444256009562602e-05, "loss": 1.4391, "step": 5543 }, { "epoch": 0.7941555651052857, "grad_norm": 1.272929072380066, "learning_rate": 2.1415554051591746e-05, "loss": 1.5163, "step": 5544 }, { "epoch": 0.7942988110585876, "grad_norm": 1.0581684112548828, "learning_rate": 2.1386869010907472e-05, "loss": 1.2667, "step": 5545 }, { "epoch": 0.7944420570118894, "grad_norm": 1.1608738899230957, "learning_rate": 2.1358200893684898e-05, "loss": 1.4381, "step": 5546 }, { "epoch": 0.7945853029651913, "grad_norm": 1.0682607889175415, "learning_rate": 2.1329549706095562e-05, "loss": 1.3557, "step": 5547 }, { "epoch": 0.7947285489184931, "grad_norm": 0.9385896325111389, "learning_rate": 2.1300915454307435e-05, "loss": 1.4743, "step": 5548 }, { "epoch": 0.7948717948717948, "grad_norm": 1.1274384260177612, "learning_rate": 2.1272298144484682e-05, "loss": 1.3438, "step": 5549 }, { "epoch": 0.7950150408250967, "grad_norm": 1.1000089645385742, "learning_rate": 2.124369778278794e-05, "loss": 1.5051, "step": 5550 }, { "epoch": 0.7951582867783985, "grad_norm": 1.0289571285247803, "learning_rate": 2.1215114375374144e-05, "loss": 1.5719, "step": 5551 }, { "epoch": 0.7953015327317003, "grad_norm": 1.1477240324020386, "learning_rate": 2.1186547928396594e-05, "loss": 1.383, "step": 5552 }, { "epoch": 0.7954447786850022, "grad_norm": 1.076686978340149, "learning_rate": 2.115799844800498e-05, "loss": 1.292, "step": 5553 }, { "epoch": 0.7955880246383039, "grad_norm": 1.0538661479949951, "learning_rate": 2.1129465940345206e-05, "loss": 1.3934, "step": 5554 }, { "epoch": 0.7957312705916058, "grad_norm": 1.0516835451126099, "learning_rate": 2.1100950411559706e-05, "loss": 1.4139, "step": 5555 }, { "epoch": 0.7958745165449076, "grad_norm": 1.0002833604812622, "learning_rate": 2.1072451867787146e-05, "loss": 1.3807, "step": 5556 }, { "epoch": 0.7960177624982094, "grad_norm": 1.0306679010391235, "learning_rate": 2.104397031516253e-05, "loss": 1.3901, "step": 5557 }, { "epoch": 0.7961610084515113, "grad_norm": 1.416664719581604, "learning_rate": 2.1015505759817223e-05, "loss": 1.3207, "step": 5558 }, { "epoch": 0.7963042544048131, "grad_norm": 1.305562138557434, "learning_rate": 2.098705820787901e-05, "loss": 1.4521, "step": 5559 }, { "epoch": 0.7964475003581148, "grad_norm": 1.0682446956634521, "learning_rate": 2.0958627665471865e-05, "loss": 1.3705, "step": 5560 }, { "epoch": 0.7965907463114167, "grad_norm": 1.4303133487701416, "learning_rate": 2.093021413871622e-05, "loss": 1.5328, "step": 5561 }, { "epoch": 0.7967339922647185, "grad_norm": 1.2248668670654297, "learning_rate": 2.0901817633728804e-05, "loss": 1.5614, "step": 5562 }, { "epoch": 0.7968772382180204, "grad_norm": 1.2573518753051758, "learning_rate": 2.087343815662267e-05, "loss": 1.2541, "step": 5563 }, { "epoch": 0.7970204841713222, "grad_norm": 1.0623184442520142, "learning_rate": 2.0845075713507222e-05, "loss": 1.4891, "step": 5564 }, { "epoch": 0.7971637301246239, "grad_norm": 1.1186460256576538, "learning_rate": 2.0816730310488186e-05, "loss": 1.2532, "step": 5565 }, { "epoch": 0.7973069760779258, "grad_norm": 1.0472114086151123, "learning_rate": 2.0788401953667668e-05, "loss": 1.477, "step": 5566 }, { "epoch": 0.7974502220312276, "grad_norm": 1.25272798538208, "learning_rate": 2.0760090649144005e-05, "loss": 1.1717, "step": 5567 }, { "epoch": 0.7975934679845295, "grad_norm": 1.0973775386810303, "learning_rate": 2.0731796403011906e-05, "loss": 1.5596, "step": 5568 }, { "epoch": 0.7977367139378313, "grad_norm": 1.1017022132873535, "learning_rate": 2.070351922136251e-05, "loss": 1.4256, "step": 5569 }, { "epoch": 0.797879959891133, "grad_norm": 1.1429880857467651, "learning_rate": 2.0675259110283117e-05, "loss": 1.5247, "step": 5570 }, { "epoch": 0.7980232058444349, "grad_norm": 1.049401879310608, "learning_rate": 2.0647016075857463e-05, "loss": 1.5903, "step": 5571 }, { "epoch": 0.7981664517977367, "grad_norm": 0.9920111894607544, "learning_rate": 2.0618790124165556e-05, "loss": 1.4359, "step": 5572 }, { "epoch": 0.7983096977510385, "grad_norm": 0.9339282512664795, "learning_rate": 2.059058126128376e-05, "loss": 1.378, "step": 5573 }, { "epoch": 0.7984529437043404, "grad_norm": 0.9228371977806091, "learning_rate": 2.0562389493284763e-05, "loss": 1.4882, "step": 5574 }, { "epoch": 0.7985961896576421, "grad_norm": 1.1618367433547974, "learning_rate": 2.0534214826237484e-05, "loss": 1.3113, "step": 5575 }, { "epoch": 0.798739435610944, "grad_norm": 1.3761992454528809, "learning_rate": 2.0506057266207313e-05, "loss": 1.4591, "step": 5576 }, { "epoch": 0.7988826815642458, "grad_norm": 0.9985799789428711, "learning_rate": 2.047791681925586e-05, "loss": 1.297, "step": 5577 }, { "epoch": 0.7990259275175476, "grad_norm": 1.0619736909866333, "learning_rate": 2.0449793491441028e-05, "loss": 1.2337, "step": 5578 }, { "epoch": 0.7991691734708495, "grad_norm": 1.008886456489563, "learning_rate": 2.0421687288817058e-05, "loss": 1.3267, "step": 5579 }, { "epoch": 0.7993124194241513, "grad_norm": 1.0852863788604736, "learning_rate": 2.0393598217434616e-05, "loss": 1.4054, "step": 5580 }, { "epoch": 0.799455665377453, "grad_norm": 1.1519148349761963, "learning_rate": 2.0365526283340508e-05, "loss": 1.418, "step": 5581 }, { "epoch": 0.7995989113307549, "grad_norm": 1.1324946880340576, "learning_rate": 2.033747149257793e-05, "loss": 1.3423, "step": 5582 }, { "epoch": 0.7997421572840567, "grad_norm": 1.0217424631118774, "learning_rate": 2.030943385118641e-05, "loss": 1.5086, "step": 5583 }, { "epoch": 0.7998854032373586, "grad_norm": 1.222145676612854, "learning_rate": 2.028141336520174e-05, "loss": 1.3491, "step": 5584 }, { "epoch": 0.8000286491906604, "grad_norm": 0.9856448173522949, "learning_rate": 2.0253410040656073e-05, "loss": 1.3835, "step": 5585 }, { "epoch": 0.8001718951439621, "grad_norm": 1.0147873163223267, "learning_rate": 2.0225423883577754e-05, "loss": 1.3378, "step": 5586 }, { "epoch": 0.800315141097264, "grad_norm": 1.2144800424575806, "learning_rate": 2.0197454899991573e-05, "loss": 1.3332, "step": 5587 }, { "epoch": 0.8004583870505658, "grad_norm": 1.1955982446670532, "learning_rate": 2.0169503095918586e-05, "loss": 1.301, "step": 5588 }, { "epoch": 0.8006016330038677, "grad_norm": 1.2471030950546265, "learning_rate": 2.014156847737605e-05, "loss": 1.1921, "step": 5589 }, { "epoch": 0.8007448789571695, "grad_norm": 0.966519832611084, "learning_rate": 2.0113651050377623e-05, "loss": 1.4768, "step": 5590 }, { "epoch": 0.8008881249104712, "grad_norm": 1.0456572771072388, "learning_rate": 2.0085750820933257e-05, "loss": 1.4942, "step": 5591 }, { "epoch": 0.8010313708637731, "grad_norm": 1.0134669542312622, "learning_rate": 2.005786779504917e-05, "loss": 1.3711, "step": 5592 }, { "epoch": 0.8011746168170749, "grad_norm": 1.0319499969482422, "learning_rate": 2.0030001978727874e-05, "loss": 1.4634, "step": 5593 }, { "epoch": 0.8013178627703768, "grad_norm": 1.1356571912765503, "learning_rate": 2.0002153377968213e-05, "loss": 1.377, "step": 5594 }, { "epoch": 0.8014611087236786, "grad_norm": 1.1101000308990479, "learning_rate": 1.997432199876531e-05, "loss": 1.3712, "step": 5595 }, { "epoch": 0.8016043546769803, "grad_norm": 1.2884794473648071, "learning_rate": 1.994650784711053e-05, "loss": 1.5032, "step": 5596 }, { "epoch": 0.8017476006302822, "grad_norm": 1.0528610944747925, "learning_rate": 1.9918710928991567e-05, "loss": 1.2989, "step": 5597 }, { "epoch": 0.801890846583584, "grad_norm": 1.105637788772583, "learning_rate": 1.9890931250392498e-05, "loss": 1.3532, "step": 5598 }, { "epoch": 0.8020340925368858, "grad_norm": 0.9321838021278381, "learning_rate": 1.9863168817293497e-05, "loss": 1.4691, "step": 5599 }, { "epoch": 0.8021773384901877, "grad_norm": 1.2732150554656982, "learning_rate": 1.983542363567118e-05, "loss": 1.314, "step": 5600 }, { "epoch": 0.8023205844434895, "grad_norm": 1.095652461051941, "learning_rate": 1.9807695711498385e-05, "loss": 1.5321, "step": 5601 }, { "epoch": 0.8024638303967913, "grad_norm": 1.01155424118042, "learning_rate": 1.9779985050744256e-05, "loss": 1.4224, "step": 5602 }, { "epoch": 0.8026070763500931, "grad_norm": 1.1057287454605103, "learning_rate": 1.9752291659374234e-05, "loss": 1.5719, "step": 5603 }, { "epoch": 0.8027503223033949, "grad_norm": 1.1701321601867676, "learning_rate": 1.9724615543349943e-05, "loss": 1.3276, "step": 5604 }, { "epoch": 0.8028935682566968, "grad_norm": 1.1319361925125122, "learning_rate": 1.9696956708629445e-05, "loss": 1.4887, "step": 5605 }, { "epoch": 0.8030368142099986, "grad_norm": 1.2079811096191406, "learning_rate": 1.9669315161167e-05, "loss": 1.2583, "step": 5606 }, { "epoch": 0.8031800601633003, "grad_norm": 1.1055455207824707, "learning_rate": 1.964169090691309e-05, "loss": 1.4886, "step": 5607 }, { "epoch": 0.8033233061166022, "grad_norm": 1.126715064048767, "learning_rate": 1.9614083951814554e-05, "loss": 1.2918, "step": 5608 }, { "epoch": 0.803466552069904, "grad_norm": 1.1366931200027466, "learning_rate": 1.958649430181455e-05, "loss": 1.4162, "step": 5609 }, { "epoch": 0.8036097980232059, "grad_norm": 1.0904452800750732, "learning_rate": 1.955892196285237e-05, "loss": 1.4557, "step": 5610 }, { "epoch": 0.8037530439765077, "grad_norm": 1.1547094583511353, "learning_rate": 1.9531366940863694e-05, "loss": 1.4017, "step": 5611 }, { "epoch": 0.8038962899298094, "grad_norm": 1.2169826030731201, "learning_rate": 1.9503829241780412e-05, "loss": 1.3618, "step": 5612 }, { "epoch": 0.8040395358831113, "grad_norm": 1.0908526182174683, "learning_rate": 1.9476308871530723e-05, "loss": 1.3414, "step": 5613 }, { "epoch": 0.8041827818364131, "grad_norm": 0.9590395092964172, "learning_rate": 1.944880583603912e-05, "loss": 1.6114, "step": 5614 }, { "epoch": 0.804326027789715, "grad_norm": 1.3139491081237793, "learning_rate": 1.9421320141226228e-05, "loss": 1.32, "step": 5615 }, { "epoch": 0.8044692737430168, "grad_norm": 1.2910867929458618, "learning_rate": 1.939385179300912e-05, "loss": 1.5992, "step": 5616 }, { "epoch": 0.8046125196963185, "grad_norm": 1.0733730792999268, "learning_rate": 1.9366400797301066e-05, "loss": 1.3882, "step": 5617 }, { "epoch": 0.8047557656496204, "grad_norm": 1.1911749839782715, "learning_rate": 1.9338967160011512e-05, "loss": 1.3702, "step": 5618 }, { "epoch": 0.8048990116029222, "grad_norm": 1.1243938207626343, "learning_rate": 1.9311550887046282e-05, "loss": 1.3426, "step": 5619 }, { "epoch": 0.805042257556224, "grad_norm": 1.0868338346481323, "learning_rate": 1.928415198430742e-05, "loss": 1.4727, "step": 5620 }, { "epoch": 0.8051855035095259, "grad_norm": 0.9629149436950684, "learning_rate": 1.925677045769322e-05, "loss": 1.574, "step": 5621 }, { "epoch": 0.8053287494628276, "grad_norm": 0.9636642336845398, "learning_rate": 1.9229406313098264e-05, "loss": 1.4913, "step": 5622 }, { "epoch": 0.8054719954161295, "grad_norm": 1.1252906322479248, "learning_rate": 1.9202059556413366e-05, "loss": 1.4437, "step": 5623 }, { "epoch": 0.8056152413694313, "grad_norm": 1.0094927549362183, "learning_rate": 1.9174730193525626e-05, "loss": 1.3751, "step": 5624 }, { "epoch": 0.8057584873227331, "grad_norm": 1.037348985671997, "learning_rate": 1.9147418230318316e-05, "loss": 1.3615, "step": 5625 }, { "epoch": 0.805901733276035, "grad_norm": 1.2424753904342651, "learning_rate": 1.9120123672671086e-05, "loss": 1.4923, "step": 5626 }, { "epoch": 0.8060449792293368, "grad_norm": 1.0879966020584106, "learning_rate": 1.9092846526459797e-05, "loss": 1.4476, "step": 5627 }, { "epoch": 0.8061882251826386, "grad_norm": 0.9539452195167542, "learning_rate": 1.906558679755649e-05, "loss": 1.4783, "step": 5628 }, { "epoch": 0.8063314711359404, "grad_norm": 1.1715091466903687, "learning_rate": 1.9038344491829495e-05, "loss": 1.5123, "step": 5629 }, { "epoch": 0.8064747170892422, "grad_norm": 0.9144424796104431, "learning_rate": 1.9011119615143492e-05, "loss": 1.597, "step": 5630 }, { "epoch": 0.8066179630425441, "grad_norm": 1.0063729286193848, "learning_rate": 1.898391217335924e-05, "loss": 1.5628, "step": 5631 }, { "epoch": 0.8067612089958459, "grad_norm": 1.1106654405593872, "learning_rate": 1.8956722172333875e-05, "loss": 1.3737, "step": 5632 }, { "epoch": 0.8069044549491476, "grad_norm": 1.0304570198059082, "learning_rate": 1.8929549617920716e-05, "loss": 1.4638, "step": 5633 }, { "epoch": 0.8070477009024495, "grad_norm": 1.1444005966186523, "learning_rate": 1.8902394515969335e-05, "loss": 1.3828, "step": 5634 }, { "epoch": 0.8071909468557513, "grad_norm": 1.0587037801742554, "learning_rate": 1.8875256872325587e-05, "loss": 1.3434, "step": 5635 }, { "epoch": 0.8073341928090532, "grad_norm": 1.166538119316101, "learning_rate": 1.884813669283145e-05, "loss": 1.4208, "step": 5636 }, { "epoch": 0.807477438762355, "grad_norm": 0.9851470589637756, "learning_rate": 1.882103398332533e-05, "loss": 1.4393, "step": 5637 }, { "epoch": 0.8076206847156567, "grad_norm": 1.0796467065811157, "learning_rate": 1.8793948749641744e-05, "loss": 1.5337, "step": 5638 }, { "epoch": 0.8077639306689586, "grad_norm": 1.5454179048538208, "learning_rate": 1.8766880997611424e-05, "loss": 1.3759, "step": 5639 }, { "epoch": 0.8079071766222604, "grad_norm": 1.0702461004257202, "learning_rate": 1.8739830733061413e-05, "loss": 1.4806, "step": 5640 }, { "epoch": 0.8080504225755623, "grad_norm": 1.007646918296814, "learning_rate": 1.8712797961814975e-05, "loss": 1.3136, "step": 5641 }, { "epoch": 0.8081936685288641, "grad_norm": 1.0816047191619873, "learning_rate": 1.8685782689691587e-05, "loss": 1.4315, "step": 5642 }, { "epoch": 0.8083369144821658, "grad_norm": 0.9248777031898499, "learning_rate": 1.865878492250698e-05, "loss": 1.4139, "step": 5643 }, { "epoch": 0.8084801604354677, "grad_norm": 1.1594047546386719, "learning_rate": 1.8631804666073094e-05, "loss": 1.3085, "step": 5644 }, { "epoch": 0.8086234063887695, "grad_norm": 1.2599555253982544, "learning_rate": 1.8604841926198135e-05, "loss": 1.2833, "step": 5645 }, { "epoch": 0.8087666523420713, "grad_norm": 1.0698325634002686, "learning_rate": 1.857789670868647e-05, "loss": 1.5376, "step": 5646 }, { "epoch": 0.8089098982953732, "grad_norm": 1.0841995477676392, "learning_rate": 1.8550969019338725e-05, "loss": 1.3287, "step": 5647 }, { "epoch": 0.809053144248675, "grad_norm": 1.1984270811080933, "learning_rate": 1.8524058863951854e-05, "loss": 1.2035, "step": 5648 }, { "epoch": 0.8091963902019768, "grad_norm": 1.0743377208709717, "learning_rate": 1.8497166248318876e-05, "loss": 1.4306, "step": 5649 }, { "epoch": 0.8093396361552786, "grad_norm": 1.3287091255187988, "learning_rate": 1.8470291178229116e-05, "loss": 1.4562, "step": 5650 }, { "epoch": 0.8094828821085804, "grad_norm": 1.0762503147125244, "learning_rate": 1.8443433659468123e-05, "loss": 1.4059, "step": 5651 }, { "epoch": 0.8096261280618823, "grad_norm": 1.1955372095108032, "learning_rate": 1.841659369781764e-05, "loss": 1.2773, "step": 5652 }, { "epoch": 0.8097693740151841, "grad_norm": 1.196042537689209, "learning_rate": 1.838977129905569e-05, "loss": 1.3151, "step": 5653 }, { "epoch": 0.8099126199684858, "grad_norm": 1.1636439561843872, "learning_rate": 1.83629664689564e-05, "loss": 1.3429, "step": 5654 }, { "epoch": 0.8100558659217877, "grad_norm": 1.153066873550415, "learning_rate": 1.833617921329024e-05, "loss": 1.4177, "step": 5655 }, { "epoch": 0.8101991118750895, "grad_norm": 1.0415980815887451, "learning_rate": 1.830940953782385e-05, "loss": 1.3, "step": 5656 }, { "epoch": 0.8103423578283914, "grad_norm": 1.0865952968597412, "learning_rate": 1.828265744832004e-05, "loss": 1.4011, "step": 5657 }, { "epoch": 0.8104856037816932, "grad_norm": 1.185973048210144, "learning_rate": 1.8255922950537872e-05, "loss": 1.4024, "step": 5658 }, { "epoch": 0.8106288497349949, "grad_norm": 1.2533318996429443, "learning_rate": 1.8229206050232684e-05, "loss": 1.2632, "step": 5659 }, { "epoch": 0.8107720956882968, "grad_norm": 1.1986994743347168, "learning_rate": 1.820250675315589e-05, "loss": 1.3096, "step": 5660 }, { "epoch": 0.8109153416415986, "grad_norm": 1.0425302982330322, "learning_rate": 1.817582506505523e-05, "loss": 1.2316, "step": 5661 }, { "epoch": 0.8110585875949005, "grad_norm": 1.0855088233947754, "learning_rate": 1.8149160991674597e-05, "loss": 1.4734, "step": 5662 }, { "epoch": 0.8112018335482023, "grad_norm": 1.2144453525543213, "learning_rate": 1.81225145387541e-05, "loss": 1.471, "step": 5663 }, { "epoch": 0.811345079501504, "grad_norm": 1.061293363571167, "learning_rate": 1.80958857120301e-05, "loss": 1.2886, "step": 5664 }, { "epoch": 0.8114883254548059, "grad_norm": 0.9511222243309021, "learning_rate": 1.8069274517235047e-05, "loss": 1.4152, "step": 5665 }, { "epoch": 0.8116315714081077, "grad_norm": 1.3765077590942383, "learning_rate": 1.8042680960097735e-05, "loss": 1.4206, "step": 5666 }, { "epoch": 0.8117748173614096, "grad_norm": 1.3805012702941895, "learning_rate": 1.8016105046343123e-05, "loss": 1.4367, "step": 5667 }, { "epoch": 0.8119180633147114, "grad_norm": 0.9719917178153992, "learning_rate": 1.798954678169228e-05, "loss": 1.223, "step": 5668 }, { "epoch": 0.8120613092680132, "grad_norm": 1.0138293504714966, "learning_rate": 1.7963006171862562e-05, "loss": 1.3879, "step": 5669 }, { "epoch": 0.812204555221315, "grad_norm": 1.1095738410949707, "learning_rate": 1.7936483222567523e-05, "loss": 1.4462, "step": 5670 }, { "epoch": 0.8123478011746168, "grad_norm": 0.9974596500396729, "learning_rate": 1.7909977939516887e-05, "loss": 1.3172, "step": 5671 }, { "epoch": 0.8124910471279186, "grad_norm": 1.0114816427230835, "learning_rate": 1.7883490328416587e-05, "loss": 1.368, "step": 5672 }, { "epoch": 0.8126342930812205, "grad_norm": 1.3137753009796143, "learning_rate": 1.785702039496875e-05, "loss": 1.4381, "step": 5673 }, { "epoch": 0.8127775390345223, "grad_norm": 1.0969570875167847, "learning_rate": 1.783056814487172e-05, "loss": 1.6914, "step": 5674 }, { "epoch": 0.812920784987824, "grad_norm": 1.1589391231536865, "learning_rate": 1.780413358381997e-05, "loss": 1.2729, "step": 5675 }, { "epoch": 0.8130640309411259, "grad_norm": 0.9956794381141663, "learning_rate": 1.7777716717504213e-05, "loss": 1.3966, "step": 5676 }, { "epoch": 0.8132072768944277, "grad_norm": 1.1816606521606445, "learning_rate": 1.775131755161139e-05, "loss": 1.2947, "step": 5677 }, { "epoch": 0.8133505228477296, "grad_norm": 0.9762355089187622, "learning_rate": 1.772493609182455e-05, "loss": 1.5738, "step": 5678 }, { "epoch": 0.8134937688010314, "grad_norm": 0.9318240284919739, "learning_rate": 1.7698572343822973e-05, "loss": 1.5495, "step": 5679 }, { "epoch": 0.8136370147543331, "grad_norm": 1.272712230682373, "learning_rate": 1.7672226313282126e-05, "loss": 1.3791, "step": 5680 }, { "epoch": 0.813780260707635, "grad_norm": 0.9423567652702332, "learning_rate": 1.7645898005873663e-05, "loss": 1.389, "step": 5681 }, { "epoch": 0.8139235066609368, "grad_norm": 1.0018702745437622, "learning_rate": 1.7619587427265405e-05, "loss": 1.327, "step": 5682 }, { "epoch": 0.8140667526142387, "grad_norm": 0.9877719283103943, "learning_rate": 1.7593294583121377e-05, "loss": 1.4499, "step": 5683 }, { "epoch": 0.8142099985675405, "grad_norm": 0.9803469181060791, "learning_rate": 1.7567019479101776e-05, "loss": 1.4247, "step": 5684 }, { "epoch": 0.8143532445208422, "grad_norm": 1.0692263841629028, "learning_rate": 1.754076212086301e-05, "loss": 1.2784, "step": 5685 }, { "epoch": 0.8144964904741441, "grad_norm": 1.0264326333999634, "learning_rate": 1.7514522514057553e-05, "loss": 1.3225, "step": 5686 }, { "epoch": 0.8146397364274459, "grad_norm": 1.051205039024353, "learning_rate": 1.7488300664334236e-05, "loss": 1.4064, "step": 5687 }, { "epoch": 0.8147829823807478, "grad_norm": 0.9736294746398926, "learning_rate": 1.746209657733795e-05, "loss": 1.2956, "step": 5688 }, { "epoch": 0.8149262283340496, "grad_norm": 1.0348933935165405, "learning_rate": 1.7435910258709752e-05, "loss": 1.3572, "step": 5689 }, { "epoch": 0.8150694742873514, "grad_norm": 0.9968773722648621, "learning_rate": 1.7409741714086948e-05, "loss": 1.4103, "step": 5690 }, { "epoch": 0.8152127202406532, "grad_norm": 1.0300939083099365, "learning_rate": 1.7383590949102945e-05, "loss": 1.3176, "step": 5691 }, { "epoch": 0.815355966193955, "grad_norm": 1.0646156072616577, "learning_rate": 1.7357457969387368e-05, "loss": 1.2789, "step": 5692 }, { "epoch": 0.8154992121472568, "grad_norm": 1.1840293407440186, "learning_rate": 1.7331342780566017e-05, "loss": 1.2092, "step": 5693 }, { "epoch": 0.8156424581005587, "grad_norm": 0.9098217487335205, "learning_rate": 1.7305245388260826e-05, "loss": 1.4562, "step": 5694 }, { "epoch": 0.8157857040538605, "grad_norm": 1.110976219177246, "learning_rate": 1.7279165798089957e-05, "loss": 1.5646, "step": 5695 }, { "epoch": 0.8159289500071623, "grad_norm": 1.040792465209961, "learning_rate": 1.7253104015667643e-05, "loss": 1.5351, "step": 5696 }, { "epoch": 0.8160721959604641, "grad_norm": 1.0433226823806763, "learning_rate": 1.7227060046604336e-05, "loss": 1.2707, "step": 5697 }, { "epoch": 0.8162154419137659, "grad_norm": 0.9411223530769348, "learning_rate": 1.7201033896506746e-05, "loss": 1.3303, "step": 5698 }, { "epoch": 0.8163586878670678, "grad_norm": 1.2658718824386597, "learning_rate": 1.7175025570977577e-05, "loss": 1.417, "step": 5699 }, { "epoch": 0.8165019338203696, "grad_norm": 1.0115282535552979, "learning_rate": 1.7149035075615794e-05, "loss": 1.4125, "step": 5700 }, { "epoch": 0.8166451797736713, "grad_norm": 1.124245524406433, "learning_rate": 1.7123062416016524e-05, "loss": 1.3277, "step": 5701 }, { "epoch": 0.8167884257269732, "grad_norm": 0.9520460367202759, "learning_rate": 1.7097107597771024e-05, "loss": 1.3215, "step": 5702 }, { "epoch": 0.816931671680275, "grad_norm": 1.044511079788208, "learning_rate": 1.707117062646676e-05, "loss": 1.291, "step": 5703 }, { "epoch": 0.8170749176335769, "grad_norm": 1.072210431098938, "learning_rate": 1.7045251507687232e-05, "loss": 1.2702, "step": 5704 }, { "epoch": 0.8172181635868787, "grad_norm": 1.1078007221221924, "learning_rate": 1.7019350247012278e-05, "loss": 1.2887, "step": 5705 }, { "epoch": 0.8173614095401804, "grad_norm": 1.2150322198867798, "learning_rate": 1.699346685001778e-05, "loss": 1.4202, "step": 5706 }, { "epoch": 0.8175046554934823, "grad_norm": 1.187727451324463, "learning_rate": 1.696760132227576e-05, "loss": 1.4804, "step": 5707 }, { "epoch": 0.8176479014467841, "grad_norm": 1.188535451889038, "learning_rate": 1.694175366935442e-05, "loss": 1.2612, "step": 5708 }, { "epoch": 0.817791147400086, "grad_norm": 1.278638243675232, "learning_rate": 1.6915923896818188e-05, "loss": 1.5195, "step": 5709 }, { "epoch": 0.8179343933533878, "grad_norm": 1.0303146839141846, "learning_rate": 1.6890112010227498e-05, "loss": 1.4401, "step": 5710 }, { "epoch": 0.8180776393066896, "grad_norm": 1.033528447151184, "learning_rate": 1.6864318015139047e-05, "loss": 1.2995, "step": 5711 }, { "epoch": 0.8182208852599914, "grad_norm": 1.2521709203720093, "learning_rate": 1.6838541917105632e-05, "loss": 1.3439, "step": 5712 }, { "epoch": 0.8183641312132932, "grad_norm": 1.2076901197433472, "learning_rate": 1.6812783721676228e-05, "loss": 1.3262, "step": 5713 }, { "epoch": 0.818507377166595, "grad_norm": 1.058674693107605, "learning_rate": 1.6787043434395942e-05, "loss": 1.4615, "step": 5714 }, { "epoch": 0.8186506231198969, "grad_norm": 1.0597326755523682, "learning_rate": 1.6761321060805957e-05, "loss": 1.3264, "step": 5715 }, { "epoch": 0.8187938690731987, "grad_norm": 1.0054484605789185, "learning_rate": 1.6735616606443728e-05, "loss": 1.1676, "step": 5716 }, { "epoch": 0.8189371150265005, "grad_norm": 1.0756042003631592, "learning_rate": 1.6709930076842805e-05, "loss": 1.4556, "step": 5717 }, { "epoch": 0.8190803609798023, "grad_norm": 1.020453929901123, "learning_rate": 1.6684261477532793e-05, "loss": 1.3462, "step": 5718 }, { "epoch": 0.8192236069331041, "grad_norm": 1.1664289236068726, "learning_rate": 1.6658610814039544e-05, "loss": 1.4291, "step": 5719 }, { "epoch": 0.819366852886406, "grad_norm": 1.3196290731430054, "learning_rate": 1.663297809188501e-05, "loss": 1.5667, "step": 5720 }, { "epoch": 0.8195100988397078, "grad_norm": 1.2542836666107178, "learning_rate": 1.6607363316587277e-05, "loss": 1.3736, "step": 5721 }, { "epoch": 0.8196533447930096, "grad_norm": 1.0937902927398682, "learning_rate": 1.6581766493660578e-05, "loss": 1.2699, "step": 5722 }, { "epoch": 0.8197965907463114, "grad_norm": 0.9938157200813293, "learning_rate": 1.6556187628615273e-05, "loss": 1.3481, "step": 5723 }, { "epoch": 0.8199398366996132, "grad_norm": 1.0735880136489868, "learning_rate": 1.6530626726957877e-05, "loss": 1.471, "step": 5724 }, { "epoch": 0.8200830826529151, "grad_norm": 1.0578898191452026, "learning_rate": 1.650508379419098e-05, "loss": 1.5773, "step": 5725 }, { "epoch": 0.8202263286062169, "grad_norm": 1.1096580028533936, "learning_rate": 1.6479558835813334e-05, "loss": 1.4956, "step": 5726 }, { "epoch": 0.8203695745595186, "grad_norm": 1.0737659931182861, "learning_rate": 1.6454051857319906e-05, "loss": 1.58, "step": 5727 }, { "epoch": 0.8205128205128205, "grad_norm": 1.0251860618591309, "learning_rate": 1.6428562864201658e-05, "loss": 1.4514, "step": 5728 }, { "epoch": 0.8206560664661223, "grad_norm": 0.9603286981582642, "learning_rate": 1.6403091861945753e-05, "loss": 1.4944, "step": 5729 }, { "epoch": 0.8207993124194242, "grad_norm": 1.2171103954315186, "learning_rate": 1.637763885603546e-05, "loss": 1.4356, "step": 5730 }, { "epoch": 0.820942558372726, "grad_norm": 0.9659716486930847, "learning_rate": 1.6352203851950198e-05, "loss": 1.4968, "step": 5731 }, { "epoch": 0.8210858043260277, "grad_norm": 1.1402076482772827, "learning_rate": 1.63267868551655e-05, "loss": 1.4284, "step": 5732 }, { "epoch": 0.8212290502793296, "grad_norm": 1.033719539642334, "learning_rate": 1.6301387871152963e-05, "loss": 1.2219, "step": 5733 }, { "epoch": 0.8213722962326314, "grad_norm": 1.0863749980926514, "learning_rate": 1.6276006905380413e-05, "loss": 1.5239, "step": 5734 }, { "epoch": 0.8215155421859333, "grad_norm": 0.996165931224823, "learning_rate": 1.625064396331176e-05, "loss": 1.3887, "step": 5735 }, { "epoch": 0.8216587881392351, "grad_norm": 1.191558599472046, "learning_rate": 1.622529905040696e-05, "loss": 1.5328, "step": 5736 }, { "epoch": 0.8218020340925369, "grad_norm": 1.1870588064193726, "learning_rate": 1.6199972172122147e-05, "loss": 1.4848, "step": 5737 }, { "epoch": 0.8219452800458387, "grad_norm": 0.956283450126648, "learning_rate": 1.6174663333909646e-05, "loss": 1.4075, "step": 5738 }, { "epoch": 0.8220885259991405, "grad_norm": 1.1312839984893799, "learning_rate": 1.6149372541217755e-05, "loss": 1.458, "step": 5739 }, { "epoch": 0.8222317719524423, "grad_norm": 1.3449602127075195, "learning_rate": 1.6124099799490968e-05, "loss": 1.4481, "step": 5740 }, { "epoch": 0.8223750179057442, "grad_norm": 1.3037852048873901, "learning_rate": 1.6098845114169893e-05, "loss": 1.4553, "step": 5741 }, { "epoch": 0.822518263859046, "grad_norm": 1.0383838415145874, "learning_rate": 1.6073608490691228e-05, "loss": 1.538, "step": 5742 }, { "epoch": 0.8226615098123478, "grad_norm": 1.051731824874878, "learning_rate": 1.604838993448783e-05, "loss": 1.328, "step": 5743 }, { "epoch": 0.8228047557656496, "grad_norm": 1.1407042741775513, "learning_rate": 1.602318945098855e-05, "loss": 1.285, "step": 5744 }, { "epoch": 0.8229480017189514, "grad_norm": 1.0576179027557373, "learning_rate": 1.5998007045618502e-05, "loss": 1.2916, "step": 5745 }, { "epoch": 0.8230912476722533, "grad_norm": 1.2697982788085938, "learning_rate": 1.597284272379883e-05, "loss": 1.3054, "step": 5746 }, { "epoch": 0.8232344936255551, "grad_norm": 1.0967398881912231, "learning_rate": 1.594769649094675e-05, "loss": 1.3967, "step": 5747 }, { "epoch": 0.8233777395788568, "grad_norm": 1.1864540576934814, "learning_rate": 1.5922568352475642e-05, "loss": 1.5568, "step": 5748 }, { "epoch": 0.8235209855321587, "grad_norm": 1.9493398666381836, "learning_rate": 1.5897458313794966e-05, "loss": 1.3124, "step": 5749 }, { "epoch": 0.8236642314854605, "grad_norm": 1.1356884241104126, "learning_rate": 1.587236638031031e-05, "loss": 1.3017, "step": 5750 }, { "epoch": 0.8238074774387624, "grad_norm": 1.2949925661087036, "learning_rate": 1.5847292557423344e-05, "loss": 1.289, "step": 5751 }, { "epoch": 0.8239507233920642, "grad_norm": 0.91130131483078, "learning_rate": 1.5822236850531824e-05, "loss": 1.2107, "step": 5752 }, { "epoch": 0.8240939693453659, "grad_norm": 0.8751494288444519, "learning_rate": 1.579719926502966e-05, "loss": 1.4364, "step": 5753 }, { "epoch": 0.8242372152986678, "grad_norm": 1.0112683773040771, "learning_rate": 1.5772179806306743e-05, "loss": 1.333, "step": 5754 }, { "epoch": 0.8243804612519696, "grad_norm": 1.006009817123413, "learning_rate": 1.5747178479749236e-05, "loss": 1.2797, "step": 5755 }, { "epoch": 0.8245237072052715, "grad_norm": 0.9960899949073792, "learning_rate": 1.5722195290739285e-05, "loss": 1.3951, "step": 5756 }, { "epoch": 0.8246669531585733, "grad_norm": 1.0580023527145386, "learning_rate": 1.5697230244655114e-05, "loss": 1.3871, "step": 5757 }, { "epoch": 0.8248101991118751, "grad_norm": 1.1296244859695435, "learning_rate": 1.5672283346871074e-05, "loss": 1.406, "step": 5758 }, { "epoch": 0.8249534450651769, "grad_norm": 1.1733819246292114, "learning_rate": 1.564735460275769e-05, "loss": 1.4868, "step": 5759 }, { "epoch": 0.8250966910184787, "grad_norm": 1.0858979225158691, "learning_rate": 1.562244401768144e-05, "loss": 1.2394, "step": 5760 }, { "epoch": 0.8252399369717806, "grad_norm": 1.012631893157959, "learning_rate": 1.5597551597004966e-05, "loss": 1.3404, "step": 5761 }, { "epoch": 0.8253831829250824, "grad_norm": 1.171512484550476, "learning_rate": 1.5572677346087004e-05, "loss": 1.2798, "step": 5762 }, { "epoch": 0.8255264288783842, "grad_norm": 1.2161369323730469, "learning_rate": 1.5547821270282346e-05, "loss": 1.5199, "step": 5763 }, { "epoch": 0.825669674831686, "grad_norm": 1.1851780414581299, "learning_rate": 1.5522983374941937e-05, "loss": 1.2706, "step": 5764 }, { "epoch": 0.8258129207849878, "grad_norm": 1.0883551836013794, "learning_rate": 1.549816366541268e-05, "loss": 1.1912, "step": 5765 }, { "epoch": 0.8259561667382896, "grad_norm": 1.0480178594589233, "learning_rate": 1.5473362147037706e-05, "loss": 1.4259, "step": 5766 }, { "epoch": 0.8260994126915915, "grad_norm": 1.109197735786438, "learning_rate": 1.544857882515617e-05, "loss": 1.3174, "step": 5767 }, { "epoch": 0.8262426586448933, "grad_norm": 1.0364460945129395, "learning_rate": 1.5423813705103275e-05, "loss": 1.4802, "step": 5768 }, { "epoch": 0.826385904598195, "grad_norm": 1.0377068519592285, "learning_rate": 1.539906679221035e-05, "loss": 1.5347, "step": 5769 }, { "epoch": 0.8265291505514969, "grad_norm": 1.0012344121932983, "learning_rate": 1.537433809180481e-05, "loss": 1.4465, "step": 5770 }, { "epoch": 0.8266723965047987, "grad_norm": 1.3207192420959473, "learning_rate": 1.5349627609210104e-05, "loss": 1.3257, "step": 5771 }, { "epoch": 0.8268156424581006, "grad_norm": 1.2061342000961304, "learning_rate": 1.5324935349745805e-05, "loss": 1.4627, "step": 5772 }, { "epoch": 0.8269588884114024, "grad_norm": 1.111749291419983, "learning_rate": 1.5300261318727537e-05, "loss": 1.3454, "step": 5773 }, { "epoch": 0.8271021343647041, "grad_norm": 0.9952139258384705, "learning_rate": 1.5275605521467052e-05, "loss": 1.3543, "step": 5774 }, { "epoch": 0.827245380318006, "grad_norm": 0.9418818950653076, "learning_rate": 1.5250967963272056e-05, "loss": 1.4071, "step": 5775 }, { "epoch": 0.8273886262713078, "grad_norm": 1.0336658954620361, "learning_rate": 1.5226348649446432e-05, "loss": 1.5182, "step": 5776 }, { "epoch": 0.8275318722246097, "grad_norm": 1.0942680835723877, "learning_rate": 1.520174758529016e-05, "loss": 1.531, "step": 5777 }, { "epoch": 0.8276751181779115, "grad_norm": 1.027611494064331, "learning_rate": 1.5177164776099184e-05, "loss": 1.4072, "step": 5778 }, { "epoch": 0.8278183641312133, "grad_norm": 1.1368470191955566, "learning_rate": 1.5152600227165591e-05, "loss": 1.3798, "step": 5779 }, { "epoch": 0.8279616100845151, "grad_norm": 1.0480561256408691, "learning_rate": 1.5128053943777532e-05, "loss": 1.4848, "step": 5780 }, { "epoch": 0.8281048560378169, "grad_norm": 0.920052170753479, "learning_rate": 1.5103525931219186e-05, "loss": 1.3972, "step": 5781 }, { "epoch": 0.8282481019911188, "grad_norm": 1.0311025381088257, "learning_rate": 1.5079016194770889e-05, "loss": 1.3737, "step": 5782 }, { "epoch": 0.8283913479444206, "grad_norm": 1.2496471405029297, "learning_rate": 1.5054524739708876e-05, "loss": 1.4451, "step": 5783 }, { "epoch": 0.8285345938977224, "grad_norm": 1.0876752138137817, "learning_rate": 1.5030051571305637e-05, "loss": 1.4355, "step": 5784 }, { "epoch": 0.8286778398510242, "grad_norm": 1.1764819622039795, "learning_rate": 1.5005596694829637e-05, "loss": 1.6363, "step": 5785 }, { "epoch": 0.828821085804326, "grad_norm": 1.175207495689392, "learning_rate": 1.4981160115545367e-05, "loss": 1.1861, "step": 5786 }, { "epoch": 0.8289643317576278, "grad_norm": 1.0424830913543701, "learning_rate": 1.4956741838713406e-05, "loss": 1.4001, "step": 5787 }, { "epoch": 0.8291075777109297, "grad_norm": 1.267825961112976, "learning_rate": 1.4932341869590483e-05, "loss": 1.3516, "step": 5788 }, { "epoch": 0.8292508236642315, "grad_norm": 0.9715421199798584, "learning_rate": 1.4907960213429239e-05, "loss": 1.7048, "step": 5789 }, { "epoch": 0.8293940696175333, "grad_norm": 1.1823830604553223, "learning_rate": 1.4883596875478457e-05, "loss": 1.4406, "step": 5790 }, { "epoch": 0.8295373155708351, "grad_norm": 1.1496598720550537, "learning_rate": 1.485925186098296e-05, "loss": 1.2746, "step": 5791 }, { "epoch": 0.8296805615241369, "grad_norm": 0.9640195369720459, "learning_rate": 1.4834925175183635e-05, "loss": 1.3106, "step": 5792 }, { "epoch": 0.8298238074774388, "grad_norm": 0.9685528874397278, "learning_rate": 1.4810616823317425e-05, "loss": 1.4178, "step": 5793 }, { "epoch": 0.8299670534307406, "grad_norm": 1.0400193929672241, "learning_rate": 1.4786326810617268e-05, "loss": 1.2435, "step": 5794 }, { "epoch": 0.8301102993840423, "grad_norm": 1.1492332220077515, "learning_rate": 1.476205514231226e-05, "loss": 1.462, "step": 5795 }, { "epoch": 0.8302535453373442, "grad_norm": 1.2634183168411255, "learning_rate": 1.4737801823627485e-05, "loss": 1.3799, "step": 5796 }, { "epoch": 0.830396791290646, "grad_norm": 1.0642378330230713, "learning_rate": 1.4713566859784045e-05, "loss": 1.5513, "step": 5797 }, { "epoch": 0.8305400372439479, "grad_norm": 1.0573594570159912, "learning_rate": 1.4689350255999146e-05, "loss": 1.3058, "step": 5798 }, { "epoch": 0.8306832831972497, "grad_norm": 1.164003610610962, "learning_rate": 1.4665152017486028e-05, "loss": 1.3271, "step": 5799 }, { "epoch": 0.8308265291505516, "grad_norm": 1.1846450567245483, "learning_rate": 1.4640972149453969e-05, "loss": 1.3016, "step": 5800 }, { "epoch": 0.8309697751038533, "grad_norm": 1.4839425086975098, "learning_rate": 1.4616810657108304e-05, "loss": 1.272, "step": 5801 }, { "epoch": 0.8311130210571551, "grad_norm": 1.2670466899871826, "learning_rate": 1.4592667545650396e-05, "loss": 1.2919, "step": 5802 }, { "epoch": 0.831256267010457, "grad_norm": 1.0132611989974976, "learning_rate": 1.4568542820277686e-05, "loss": 1.4328, "step": 5803 }, { "epoch": 0.8313995129637588, "grad_norm": 1.1387439966201782, "learning_rate": 1.4544436486183577e-05, "loss": 1.4944, "step": 5804 }, { "epoch": 0.8315427589170606, "grad_norm": 1.0245195627212524, "learning_rate": 1.4520348548557583e-05, "loss": 1.3372, "step": 5805 }, { "epoch": 0.8316860048703624, "grad_norm": 1.5224705934524536, "learning_rate": 1.449627901258529e-05, "loss": 1.4102, "step": 5806 }, { "epoch": 0.8318292508236642, "grad_norm": 1.0465983152389526, "learning_rate": 1.4472227883448219e-05, "loss": 1.6267, "step": 5807 }, { "epoch": 0.831972496776966, "grad_norm": 1.0657180547714233, "learning_rate": 1.4448195166324008e-05, "loss": 1.4945, "step": 5808 }, { "epoch": 0.8321157427302679, "grad_norm": 0.9429823756217957, "learning_rate": 1.4424180866386283e-05, "loss": 1.524, "step": 5809 }, { "epoch": 0.8322589886835697, "grad_norm": 1.0257388353347778, "learning_rate": 1.4400184988804754e-05, "loss": 1.4285, "step": 5810 }, { "epoch": 0.8324022346368715, "grad_norm": 1.042864203453064, "learning_rate": 1.4376207538745134e-05, "loss": 1.4724, "step": 5811 }, { "epoch": 0.8325454805901733, "grad_norm": 1.0017151832580566, "learning_rate": 1.4352248521369161e-05, "loss": 1.2386, "step": 5812 }, { "epoch": 0.8326887265434751, "grad_norm": 1.0717041492462158, "learning_rate": 1.432830794183464e-05, "loss": 1.3436, "step": 5813 }, { "epoch": 0.832831972496777, "grad_norm": 1.0257350206375122, "learning_rate": 1.4304385805295384e-05, "loss": 1.4052, "step": 5814 }, { "epoch": 0.8329752184500788, "grad_norm": 1.087152361869812, "learning_rate": 1.4280482116901195e-05, "loss": 1.6241, "step": 5815 }, { "epoch": 0.8331184644033806, "grad_norm": 1.0746204853057861, "learning_rate": 1.425659688179799e-05, "loss": 1.3253, "step": 5816 }, { "epoch": 0.8332617103566824, "grad_norm": 1.1218425035476685, "learning_rate": 1.4232730105127689e-05, "loss": 1.4443, "step": 5817 }, { "epoch": 0.8334049563099842, "grad_norm": 1.0890130996704102, "learning_rate": 1.420888179202815e-05, "loss": 1.309, "step": 5818 }, { "epoch": 0.8335482022632861, "grad_norm": 1.0417159795761108, "learning_rate": 1.4185051947633377e-05, "loss": 1.4817, "step": 5819 }, { "epoch": 0.8336914482165879, "grad_norm": 1.1337549686431885, "learning_rate": 1.416124057707331e-05, "loss": 1.3495, "step": 5820 }, { "epoch": 0.8338346941698896, "grad_norm": 1.3896255493164062, "learning_rate": 1.413744768547398e-05, "loss": 1.3021, "step": 5821 }, { "epoch": 0.8339779401231915, "grad_norm": 1.1626311540603638, "learning_rate": 1.4113673277957395e-05, "loss": 1.4148, "step": 5822 }, { "epoch": 0.8341211860764933, "grad_norm": 1.144176959991455, "learning_rate": 1.408991735964159e-05, "loss": 1.4197, "step": 5823 }, { "epoch": 0.8342644320297952, "grad_norm": 1.0574923753738403, "learning_rate": 1.4066179935640666e-05, "loss": 1.3387, "step": 5824 }, { "epoch": 0.834407677983097, "grad_norm": 1.0092427730560303, "learning_rate": 1.4042461011064634e-05, "loss": 1.3207, "step": 5825 }, { "epoch": 0.8345509239363988, "grad_norm": 1.3478448390960693, "learning_rate": 1.401876059101962e-05, "loss": 1.4324, "step": 5826 }, { "epoch": 0.8346941698897006, "grad_norm": 1.2492777109146118, "learning_rate": 1.3995078680607776e-05, "loss": 1.331, "step": 5827 }, { "epoch": 0.8348374158430024, "grad_norm": 0.9944934844970703, "learning_rate": 1.3971415284927192e-05, "loss": 1.4097, "step": 5828 }, { "epoch": 0.8349806617963043, "grad_norm": 1.1373724937438965, "learning_rate": 1.3947770409072014e-05, "loss": 1.352, "step": 5829 }, { "epoch": 0.8351239077496061, "grad_norm": 1.0326415300369263, "learning_rate": 1.3924144058132405e-05, "loss": 1.5714, "step": 5830 }, { "epoch": 0.8352671537029079, "grad_norm": 1.0562578439712524, "learning_rate": 1.3900536237194528e-05, "loss": 1.444, "step": 5831 }, { "epoch": 0.8354103996562097, "grad_norm": 0.9469882249832153, "learning_rate": 1.3876946951340598e-05, "loss": 1.5159, "step": 5832 }, { "epoch": 0.8355536456095115, "grad_norm": 1.2857955694198608, "learning_rate": 1.3853376205648727e-05, "loss": 1.2951, "step": 5833 }, { "epoch": 0.8356968915628133, "grad_norm": 1.136573314666748, "learning_rate": 1.3829824005193181e-05, "loss": 1.4218, "step": 5834 }, { "epoch": 0.8358401375161152, "grad_norm": 1.0780973434448242, "learning_rate": 1.3806290355044171e-05, "loss": 1.3424, "step": 5835 }, { "epoch": 0.835983383469417, "grad_norm": 1.01009202003479, "learning_rate": 1.3782775260267856e-05, "loss": 1.4121, "step": 5836 }, { "epoch": 0.8361266294227188, "grad_norm": 0.9678208827972412, "learning_rate": 1.3759278725926472e-05, "loss": 1.4011, "step": 5837 }, { "epoch": 0.8362698753760206, "grad_norm": 1.0487544536590576, "learning_rate": 1.3735800757078287e-05, "loss": 1.3903, "step": 5838 }, { "epoch": 0.8364131213293224, "grad_norm": 1.0286619663238525, "learning_rate": 1.3712341358777469e-05, "loss": 1.3095, "step": 5839 }, { "epoch": 0.8365563672826243, "grad_norm": 0.9621546864509583, "learning_rate": 1.3688900536074278e-05, "loss": 1.6125, "step": 5840 }, { "epoch": 0.8366996132359261, "grad_norm": 1.0732231140136719, "learning_rate": 1.3665478294014922e-05, "loss": 1.3224, "step": 5841 }, { "epoch": 0.8368428591892279, "grad_norm": 1.0345354080200195, "learning_rate": 1.3642074637641644e-05, "loss": 1.3836, "step": 5842 }, { "epoch": 0.8369861051425297, "grad_norm": 1.2423455715179443, "learning_rate": 1.3618689571992705e-05, "loss": 1.4345, "step": 5843 }, { "epoch": 0.8371293510958315, "grad_norm": 0.9972796440124512, "learning_rate": 1.359532310210223e-05, "loss": 1.4318, "step": 5844 }, { "epoch": 0.8372725970491334, "grad_norm": 1.119672417640686, "learning_rate": 1.3571975233000544e-05, "loss": 1.3708, "step": 5845 }, { "epoch": 0.8374158430024352, "grad_norm": 1.4175831079483032, "learning_rate": 1.3548645969713848e-05, "loss": 1.3858, "step": 5846 }, { "epoch": 0.837559088955737, "grad_norm": 1.1079649925231934, "learning_rate": 1.352533531726432e-05, "loss": 1.3008, "step": 5847 }, { "epoch": 0.8377023349090388, "grad_norm": 1.0787467956542969, "learning_rate": 1.3502043280670174e-05, "loss": 1.2124, "step": 5848 }, { "epoch": 0.8378455808623406, "grad_norm": 1.1681524515151978, "learning_rate": 1.3478769864945618e-05, "loss": 1.3824, "step": 5849 }, { "epoch": 0.8379888268156425, "grad_norm": 1.1513557434082031, "learning_rate": 1.3455515075100844e-05, "loss": 1.4161, "step": 5850 }, { "epoch": 0.8381320727689443, "grad_norm": 0.9557899832725525, "learning_rate": 1.3432278916142027e-05, "loss": 1.307, "step": 5851 }, { "epoch": 0.8382753187222461, "grad_norm": 1.0192763805389404, "learning_rate": 1.3409061393071343e-05, "loss": 1.3341, "step": 5852 }, { "epoch": 0.8384185646755479, "grad_norm": 1.173835277557373, "learning_rate": 1.3385862510886971e-05, "loss": 1.4787, "step": 5853 }, { "epoch": 0.8385618106288497, "grad_norm": 1.1230459213256836, "learning_rate": 1.3362682274583017e-05, "loss": 1.2567, "step": 5854 }, { "epoch": 0.8387050565821516, "grad_norm": 1.2405790090560913, "learning_rate": 1.3339520689149599e-05, "loss": 1.3182, "step": 5855 }, { "epoch": 0.8388483025354534, "grad_norm": 1.0970443487167358, "learning_rate": 1.3316377759572906e-05, "loss": 1.4368, "step": 5856 }, { "epoch": 0.8389915484887552, "grad_norm": 1.1253119707107544, "learning_rate": 1.3293253490834978e-05, "loss": 1.4241, "step": 5857 }, { "epoch": 0.839134794442057, "grad_norm": 1.1183799505233765, "learning_rate": 1.3270147887913909e-05, "loss": 1.4552, "step": 5858 }, { "epoch": 0.8392780403953588, "grad_norm": 1.2072148323059082, "learning_rate": 1.3247060955783775e-05, "loss": 1.3884, "step": 5859 }, { "epoch": 0.8394212863486606, "grad_norm": 1.349250316619873, "learning_rate": 1.322399269941461e-05, "loss": 1.4938, "step": 5860 }, { "epoch": 0.8395645323019625, "grad_norm": 1.1933029890060425, "learning_rate": 1.320094312377247e-05, "loss": 1.3254, "step": 5861 }, { "epoch": 0.8397077782552643, "grad_norm": 1.1124521493911743, "learning_rate": 1.3177912233819289e-05, "loss": 1.3491, "step": 5862 }, { "epoch": 0.839851024208566, "grad_norm": 1.0690639019012451, "learning_rate": 1.3154900034513106e-05, "loss": 1.5199, "step": 5863 }, { "epoch": 0.8399942701618679, "grad_norm": 0.9684329628944397, "learning_rate": 1.3131906530807891e-05, "loss": 1.5549, "step": 5864 }, { "epoch": 0.8401375161151697, "grad_norm": 1.0842161178588867, "learning_rate": 1.3108931727653529e-05, "loss": 1.2659, "step": 5865 }, { "epoch": 0.8402807620684716, "grad_norm": 1.289391279220581, "learning_rate": 1.3085975629995916e-05, "loss": 1.2047, "step": 5866 }, { "epoch": 0.8404240080217734, "grad_norm": 0.9037125706672668, "learning_rate": 1.3063038242777014e-05, "loss": 1.4522, "step": 5867 }, { "epoch": 0.8405672539750753, "grad_norm": 1.1377625465393066, "learning_rate": 1.3040119570934595e-05, "loss": 1.199, "step": 5868 }, { "epoch": 0.840710499928377, "grad_norm": 1.0364843606948853, "learning_rate": 1.3017219619402509e-05, "loss": 1.481, "step": 5869 }, { "epoch": 0.8408537458816788, "grad_norm": 1.1615277528762817, "learning_rate": 1.299433839311055e-05, "loss": 1.4197, "step": 5870 }, { "epoch": 0.8409969918349807, "grad_norm": 0.972763180732727, "learning_rate": 1.2971475896984475e-05, "loss": 1.4199, "step": 5871 }, { "epoch": 0.8411402377882825, "grad_norm": 0.9330835342407227, "learning_rate": 1.2948632135946048e-05, "loss": 1.4107, "step": 5872 }, { "epoch": 0.8412834837415843, "grad_norm": 0.9538688659667969, "learning_rate": 1.292580711491289e-05, "loss": 1.4094, "step": 5873 }, { "epoch": 0.8414267296948861, "grad_norm": 1.1188175678253174, "learning_rate": 1.2903000838798752e-05, "loss": 1.4263, "step": 5874 }, { "epoch": 0.8415699756481879, "grad_norm": 0.95613032579422, "learning_rate": 1.288021331251319e-05, "loss": 1.3848, "step": 5875 }, { "epoch": 0.8417132216014898, "grad_norm": 1.1459360122680664, "learning_rate": 1.2857444540961805e-05, "loss": 1.2825, "step": 5876 }, { "epoch": 0.8418564675547916, "grad_norm": 0.9626212120056152, "learning_rate": 1.2834694529046209e-05, "loss": 1.2882, "step": 5877 }, { "epoch": 0.8419997135080934, "grad_norm": 1.1761460304260254, "learning_rate": 1.2811963281663853e-05, "loss": 1.3349, "step": 5878 }, { "epoch": 0.8421429594613952, "grad_norm": 1.0111474990844727, "learning_rate": 1.278925080370823e-05, "loss": 1.5591, "step": 5879 }, { "epoch": 0.842286205414697, "grad_norm": 1.220505952835083, "learning_rate": 1.2766557100068787e-05, "loss": 1.2215, "step": 5880 }, { "epoch": 0.8424294513679988, "grad_norm": 1.0335100889205933, "learning_rate": 1.2743882175630905e-05, "loss": 1.5031, "step": 5881 }, { "epoch": 0.8425726973213007, "grad_norm": 1.2098928689956665, "learning_rate": 1.2721226035275957e-05, "loss": 1.3362, "step": 5882 }, { "epoch": 0.8427159432746025, "grad_norm": 1.0982825756072998, "learning_rate": 1.2698588683881186e-05, "loss": 1.3165, "step": 5883 }, { "epoch": 0.8428591892279043, "grad_norm": 1.0473443269729614, "learning_rate": 1.2675970126319914e-05, "loss": 1.4683, "step": 5884 }, { "epoch": 0.8430024351812061, "grad_norm": 1.0436902046203613, "learning_rate": 1.2653370367461359e-05, "loss": 1.4847, "step": 5885 }, { "epoch": 0.8431456811345079, "grad_norm": 1.2056255340576172, "learning_rate": 1.2630789412170663e-05, "loss": 1.266, "step": 5886 }, { "epoch": 0.8432889270878098, "grad_norm": 1.0287574529647827, "learning_rate": 1.2608227265308925e-05, "loss": 1.1773, "step": 5887 }, { "epoch": 0.8434321730411116, "grad_norm": 1.2423624992370605, "learning_rate": 1.2585683931733283e-05, "loss": 1.2254, "step": 5888 }, { "epoch": 0.8435754189944135, "grad_norm": 1.3301151990890503, "learning_rate": 1.256315941629671e-05, "loss": 1.4898, "step": 5889 }, { "epoch": 0.8437186649477152, "grad_norm": 1.149516224861145, "learning_rate": 1.2540653723848183e-05, "loss": 1.3118, "step": 5890 }, { "epoch": 0.843861910901017, "grad_norm": 1.135151982307434, "learning_rate": 1.2518166859232627e-05, "loss": 1.5089, "step": 5891 }, { "epoch": 0.8440051568543189, "grad_norm": 1.168349027633667, "learning_rate": 1.2495698827290902e-05, "loss": 1.2862, "step": 5892 }, { "epoch": 0.8441484028076207, "grad_norm": 1.0435901880264282, "learning_rate": 1.2473249632859862e-05, "loss": 1.3335, "step": 5893 }, { "epoch": 0.8442916487609226, "grad_norm": 1.2024805545806885, "learning_rate": 1.2450819280772164e-05, "loss": 1.5125, "step": 5894 }, { "epoch": 0.8444348947142243, "grad_norm": 1.0005171298980713, "learning_rate": 1.2428407775856598e-05, "loss": 1.4868, "step": 5895 }, { "epoch": 0.8445781406675261, "grad_norm": 0.9306319355964661, "learning_rate": 1.2406015122937809e-05, "loss": 1.4259, "step": 5896 }, { "epoch": 0.844721386620828, "grad_norm": 1.1300095319747925, "learning_rate": 1.2383641326836314e-05, "loss": 1.3294, "step": 5897 }, { "epoch": 0.8448646325741298, "grad_norm": 1.4026776552200317, "learning_rate": 1.2361286392368676e-05, "loss": 1.3565, "step": 5898 }, { "epoch": 0.8450078785274316, "grad_norm": 1.4191770553588867, "learning_rate": 1.2338950324347354e-05, "loss": 1.2097, "step": 5899 }, { "epoch": 0.8451511244807334, "grad_norm": 0.9725842475891113, "learning_rate": 1.2316633127580757e-05, "loss": 1.3567, "step": 5900 }, { "epoch": 0.8452943704340352, "grad_norm": 0.9735100865364075, "learning_rate": 1.2294334806873231e-05, "loss": 1.223, "step": 5901 }, { "epoch": 0.845437616387337, "grad_norm": 0.9836328029632568, "learning_rate": 1.2272055367025027e-05, "loss": 1.4428, "step": 5902 }, { "epoch": 0.8455808623406389, "grad_norm": 1.0228328704833984, "learning_rate": 1.2249794812832404e-05, "loss": 1.5348, "step": 5903 }, { "epoch": 0.8457241082939407, "grad_norm": 1.0916717052459717, "learning_rate": 1.2227553149087446e-05, "loss": 1.4367, "step": 5904 }, { "epoch": 0.8458673542472425, "grad_norm": 1.0190634727478027, "learning_rate": 1.2205330380578251e-05, "loss": 1.6492, "step": 5905 }, { "epoch": 0.8460106002005443, "grad_norm": 1.0167508125305176, "learning_rate": 1.2183126512088882e-05, "loss": 1.1656, "step": 5906 }, { "epoch": 0.8461538461538461, "grad_norm": 1.166505217552185, "learning_rate": 1.216094154839923e-05, "loss": 1.4668, "step": 5907 }, { "epoch": 0.846297092107148, "grad_norm": 1.1457016468048096, "learning_rate": 1.2138775494285182e-05, "loss": 1.432, "step": 5908 }, { "epoch": 0.8464403380604498, "grad_norm": 1.2262722253799438, "learning_rate": 1.2116628354518544e-05, "loss": 1.3973, "step": 5909 }, { "epoch": 0.8465835840137517, "grad_norm": 0.9604052901268005, "learning_rate": 1.2094500133867038e-05, "loss": 1.3984, "step": 5910 }, { "epoch": 0.8467268299670534, "grad_norm": 1.1697322130203247, "learning_rate": 1.207239083709435e-05, "loss": 1.1937, "step": 5911 }, { "epoch": 0.8468700759203552, "grad_norm": 0.9779345393180847, "learning_rate": 1.2050300468960007e-05, "loss": 1.3652, "step": 5912 }, { "epoch": 0.8470133218736571, "grad_norm": 1.23443603515625, "learning_rate": 1.2028229034219584e-05, "loss": 1.5366, "step": 5913 }, { "epoch": 0.8471565678269589, "grad_norm": 1.264336347579956, "learning_rate": 1.2006176537624491e-05, "loss": 1.3569, "step": 5914 }, { "epoch": 0.8472998137802608, "grad_norm": 1.0483895540237427, "learning_rate": 1.1984142983922075e-05, "loss": 1.316, "step": 5915 }, { "epoch": 0.8474430597335625, "grad_norm": 1.1203054189682007, "learning_rate": 1.1962128377855597e-05, "loss": 1.3518, "step": 5916 }, { "epoch": 0.8475863056868643, "grad_norm": 1.14857017993927, "learning_rate": 1.194013272416431e-05, "loss": 1.5212, "step": 5917 }, { "epoch": 0.8477295516401662, "grad_norm": 1.077927827835083, "learning_rate": 1.1918156027583294e-05, "loss": 1.3337, "step": 5918 }, { "epoch": 0.847872797593468, "grad_norm": 0.9853607416152954, "learning_rate": 1.1896198292843597e-05, "loss": 1.3714, "step": 5919 }, { "epoch": 0.8480160435467698, "grad_norm": 1.0781772136688232, "learning_rate": 1.187425952467217e-05, "loss": 1.5929, "step": 5920 }, { "epoch": 0.8481592895000716, "grad_norm": 1.2823963165283203, "learning_rate": 1.1852339727791906e-05, "loss": 1.52, "step": 5921 }, { "epoch": 0.8483025354533734, "grad_norm": 1.1340436935424805, "learning_rate": 1.1830438906921592e-05, "loss": 1.5589, "step": 5922 }, { "epoch": 0.8484457814066753, "grad_norm": 1.2404865026474, "learning_rate": 1.1808557066775883e-05, "loss": 1.3667, "step": 5923 }, { "epoch": 0.8485890273599771, "grad_norm": 0.9128482341766357, "learning_rate": 1.1786694212065463e-05, "loss": 1.2467, "step": 5924 }, { "epoch": 0.8487322733132789, "grad_norm": 1.0264840126037598, "learning_rate": 1.1764850347496859e-05, "loss": 1.3486, "step": 5925 }, { "epoch": 0.8488755192665807, "grad_norm": 1.2550785541534424, "learning_rate": 1.1743025477772474e-05, "loss": 1.4195, "step": 5926 }, { "epoch": 0.8490187652198825, "grad_norm": 1.2878550291061401, "learning_rate": 1.1721219607590683e-05, "loss": 1.3839, "step": 5927 }, { "epoch": 0.8491620111731844, "grad_norm": 1.253336787223816, "learning_rate": 1.1699432741645754e-05, "loss": 1.3767, "step": 5928 }, { "epoch": 0.8493052571264862, "grad_norm": 0.9495038986206055, "learning_rate": 1.1677664884627847e-05, "loss": 1.3349, "step": 5929 }, { "epoch": 0.849448503079788, "grad_norm": 1.0434354543685913, "learning_rate": 1.1655916041223059e-05, "loss": 1.3824, "step": 5930 }, { "epoch": 0.8495917490330898, "grad_norm": 1.0651259422302246, "learning_rate": 1.1634186216113363e-05, "loss": 1.1771, "step": 5931 }, { "epoch": 0.8497349949863916, "grad_norm": 1.1817960739135742, "learning_rate": 1.1612475413976686e-05, "loss": 1.1969, "step": 5932 }, { "epoch": 0.8498782409396934, "grad_norm": 1.1750916242599487, "learning_rate": 1.159078363948678e-05, "loss": 1.4054, "step": 5933 }, { "epoch": 0.8500214868929953, "grad_norm": 1.1518166065216064, "learning_rate": 1.1569110897313351e-05, "loss": 1.5648, "step": 5934 }, { "epoch": 0.8501647328462971, "grad_norm": 1.1537971496582031, "learning_rate": 1.1547457192122058e-05, "loss": 1.4468, "step": 5935 }, { "epoch": 0.850307978799599, "grad_norm": 0.9919856786727905, "learning_rate": 1.1525822528574349e-05, "loss": 1.2949, "step": 5936 }, { "epoch": 0.8504512247529007, "grad_norm": 1.0180913209915161, "learning_rate": 1.1504206911327653e-05, "loss": 1.4565, "step": 5937 }, { "epoch": 0.8505944707062025, "grad_norm": 0.9992722272872925, "learning_rate": 1.1482610345035283e-05, "loss": 1.5096, "step": 5938 }, { "epoch": 0.8507377166595044, "grad_norm": 1.1832038164138794, "learning_rate": 1.146103283434643e-05, "loss": 1.3783, "step": 5939 }, { "epoch": 0.8508809626128062, "grad_norm": 1.0289227962493896, "learning_rate": 1.1439474383906223e-05, "loss": 1.5543, "step": 5940 }, { "epoch": 0.851024208566108, "grad_norm": 1.0689349174499512, "learning_rate": 1.141793499835564e-05, "loss": 1.3577, "step": 5941 }, { "epoch": 0.8511674545194098, "grad_norm": 1.068604826927185, "learning_rate": 1.1396414682331592e-05, "loss": 1.5291, "step": 5942 }, { "epoch": 0.8513107004727116, "grad_norm": 1.0408991575241089, "learning_rate": 1.1374913440466872e-05, "loss": 1.2585, "step": 5943 }, { "epoch": 0.8514539464260135, "grad_norm": 1.0943585634231567, "learning_rate": 1.1353431277390126e-05, "loss": 1.3339, "step": 5944 }, { "epoch": 0.8515971923793153, "grad_norm": 1.0280113220214844, "learning_rate": 1.1331968197725984e-05, "loss": 1.4563, "step": 5945 }, { "epoch": 0.8517404383326171, "grad_norm": 1.4389575719833374, "learning_rate": 1.1310524206094919e-05, "loss": 1.5263, "step": 5946 }, { "epoch": 0.8518836842859189, "grad_norm": 1.0397403240203857, "learning_rate": 1.128909930711325e-05, "loss": 1.4598, "step": 5947 }, { "epoch": 0.8520269302392207, "grad_norm": 0.9412740468978882, "learning_rate": 1.126769350539324e-05, "loss": 1.7607, "step": 5948 }, { "epoch": 0.8521701761925226, "grad_norm": 0.9969056844711304, "learning_rate": 1.1246306805543038e-05, "loss": 1.3967, "step": 5949 }, { "epoch": 0.8523134221458244, "grad_norm": 1.126189947128296, "learning_rate": 1.1224939212166662e-05, "loss": 1.4216, "step": 5950 }, { "epoch": 0.8524566680991262, "grad_norm": 1.1168287992477417, "learning_rate": 1.1203590729864033e-05, "loss": 1.4634, "step": 5951 }, { "epoch": 0.852599914052428, "grad_norm": 1.0573464632034302, "learning_rate": 1.1182261363230962e-05, "loss": 1.4072, "step": 5952 }, { "epoch": 0.8527431600057298, "grad_norm": 1.0440900325775146, "learning_rate": 1.116095111685913e-05, "loss": 1.2846, "step": 5953 }, { "epoch": 0.8528864059590316, "grad_norm": 1.2952592372894287, "learning_rate": 1.1139659995336082e-05, "loss": 1.3101, "step": 5954 }, { "epoch": 0.8530296519123335, "grad_norm": 1.1916544437408447, "learning_rate": 1.1118388003245262e-05, "loss": 1.2321, "step": 5955 }, { "epoch": 0.8531728978656353, "grad_norm": 1.1573394536972046, "learning_rate": 1.1097135145166072e-05, "loss": 1.3454, "step": 5956 }, { "epoch": 0.8533161438189372, "grad_norm": 1.0574326515197754, "learning_rate": 1.1075901425673651e-05, "loss": 1.5053, "step": 5957 }, { "epoch": 0.8534593897722389, "grad_norm": 1.1780495643615723, "learning_rate": 1.1054686849339124e-05, "loss": 1.4924, "step": 5958 }, { "epoch": 0.8536026357255407, "grad_norm": 1.141882061958313, "learning_rate": 1.103349142072947e-05, "loss": 1.501, "step": 5959 }, { "epoch": 0.8537458816788426, "grad_norm": 0.8827534317970276, "learning_rate": 1.1012315144407538e-05, "loss": 1.4229, "step": 5960 }, { "epoch": 0.8538891276321444, "grad_norm": 1.001202940940857, "learning_rate": 1.0991158024932068e-05, "loss": 1.201, "step": 5961 }, { "epoch": 0.8540323735854463, "grad_norm": 1.0827302932739258, "learning_rate": 1.0970020066857601e-05, "loss": 1.4203, "step": 5962 }, { "epoch": 0.854175619538748, "grad_norm": 1.2331846952438354, "learning_rate": 1.0948901274734691e-05, "loss": 1.3337, "step": 5963 }, { "epoch": 0.8543188654920498, "grad_norm": 1.075698971748352, "learning_rate": 1.0927801653109682e-05, "loss": 1.3747, "step": 5964 }, { "epoch": 0.8544621114453517, "grad_norm": 1.0469216108322144, "learning_rate": 1.0906721206524762e-05, "loss": 1.1162, "step": 5965 }, { "epoch": 0.8546053573986535, "grad_norm": 1.0299696922302246, "learning_rate": 1.0885659939518033e-05, "loss": 1.4588, "step": 5966 }, { "epoch": 0.8547486033519553, "grad_norm": 0.8674678206443787, "learning_rate": 1.0864617856623516e-05, "loss": 1.4715, "step": 5967 }, { "epoch": 0.8548918493052571, "grad_norm": 0.99725741147995, "learning_rate": 1.0843594962371007e-05, "loss": 1.5728, "step": 5968 }, { "epoch": 0.8550350952585589, "grad_norm": 1.1514919996261597, "learning_rate": 1.0822591261286207e-05, "loss": 1.3922, "step": 5969 }, { "epoch": 0.8551783412118608, "grad_norm": 1.228461503982544, "learning_rate": 1.0801606757890725e-05, "loss": 1.5458, "step": 5970 }, { "epoch": 0.8553215871651626, "grad_norm": 1.1286852359771729, "learning_rate": 1.078064145670199e-05, "loss": 1.4287, "step": 5971 }, { "epoch": 0.8554648331184644, "grad_norm": 1.0499244928359985, "learning_rate": 1.0759695362233324e-05, "loss": 1.1998, "step": 5972 }, { "epoch": 0.8556080790717662, "grad_norm": 1.0609266757965088, "learning_rate": 1.0738768478993854e-05, "loss": 1.3644, "step": 5973 }, { "epoch": 0.855751325025068, "grad_norm": 1.0579861402511597, "learning_rate": 1.0717860811488689e-05, "loss": 1.4787, "step": 5974 }, { "epoch": 0.8558945709783699, "grad_norm": 1.088641881942749, "learning_rate": 1.0696972364218705e-05, "loss": 1.4341, "step": 5975 }, { "epoch": 0.8560378169316717, "grad_norm": 1.151374101638794, "learning_rate": 1.0676103141680649e-05, "loss": 1.4171, "step": 5976 }, { "epoch": 0.8561810628849735, "grad_norm": 1.2851057052612305, "learning_rate": 1.0655253148367173e-05, "loss": 1.4876, "step": 5977 }, { "epoch": 0.8563243088382754, "grad_norm": 1.0351163148880005, "learning_rate": 1.0634422388766752e-05, "loss": 1.3716, "step": 5978 }, { "epoch": 0.8564675547915771, "grad_norm": 1.0411376953125, "learning_rate": 1.0613610867363722e-05, "loss": 1.3855, "step": 5979 }, { "epoch": 0.8566108007448789, "grad_norm": 0.9885153770446777, "learning_rate": 1.059281858863832e-05, "loss": 1.1327, "step": 5980 }, { "epoch": 0.8567540466981808, "grad_norm": 1.0979875326156616, "learning_rate": 1.0572045557066579e-05, "loss": 1.315, "step": 5981 }, { "epoch": 0.8568972926514826, "grad_norm": 1.2584151029586792, "learning_rate": 1.0551291777120464e-05, "loss": 1.3016, "step": 5982 }, { "epoch": 0.8570405386047845, "grad_norm": 1.2990772724151611, "learning_rate": 1.053055725326768e-05, "loss": 1.315, "step": 5983 }, { "epoch": 0.8571837845580862, "grad_norm": 0.9196381568908691, "learning_rate": 1.0509841989971891e-05, "loss": 1.5631, "step": 5984 }, { "epoch": 0.857327030511388, "grad_norm": 1.2640011310577393, "learning_rate": 1.0489145991692618e-05, "loss": 1.496, "step": 5985 }, { "epoch": 0.8574702764646899, "grad_norm": 1.1008282899856567, "learning_rate": 1.0468469262885139e-05, "loss": 1.3345, "step": 5986 }, { "epoch": 0.8576135224179917, "grad_norm": 0.9982852339744568, "learning_rate": 1.0447811808000673e-05, "loss": 1.5105, "step": 5987 }, { "epoch": 0.8577567683712936, "grad_norm": 0.9490407705307007, "learning_rate": 1.0427173631486254e-05, "loss": 1.218, "step": 5988 }, { "epoch": 0.8579000143245953, "grad_norm": 1.120341181755066, "learning_rate": 1.0406554737784757e-05, "loss": 1.5099, "step": 5989 }, { "epoch": 0.8580432602778971, "grad_norm": 0.9381440281867981, "learning_rate": 1.0385955131334958e-05, "loss": 1.4984, "step": 5990 }, { "epoch": 0.858186506231199, "grad_norm": 0.8875547647476196, "learning_rate": 1.0365374816571382e-05, "loss": 1.3073, "step": 5991 }, { "epoch": 0.8583297521845008, "grad_norm": 1.1631871461868286, "learning_rate": 1.0344813797924501e-05, "loss": 1.3985, "step": 5992 }, { "epoch": 0.8584729981378026, "grad_norm": 1.0492743253707886, "learning_rate": 1.0324272079820618e-05, "loss": 1.173, "step": 5993 }, { "epoch": 0.8586162440911044, "grad_norm": 1.0785988569259644, "learning_rate": 1.0303749666681806e-05, "loss": 1.3367, "step": 5994 }, { "epoch": 0.8587594900444062, "grad_norm": 1.0852069854736328, "learning_rate": 1.0283246562926041e-05, "loss": 1.2261, "step": 5995 }, { "epoch": 0.8589027359977081, "grad_norm": 1.0551471710205078, "learning_rate": 1.0262762772967172e-05, "loss": 1.4964, "step": 5996 }, { "epoch": 0.8590459819510099, "grad_norm": 1.0236765146255493, "learning_rate": 1.0242298301214814e-05, "loss": 1.342, "step": 5997 }, { "epoch": 0.8591892279043117, "grad_norm": 1.1315295696258545, "learning_rate": 1.0221853152074468e-05, "loss": 1.468, "step": 5998 }, { "epoch": 0.8593324738576136, "grad_norm": 1.445736050605774, "learning_rate": 1.0201427329947477e-05, "loss": 1.2237, "step": 5999 }, { "epoch": 0.8594757198109153, "grad_norm": 1.1562144756317139, "learning_rate": 1.0181020839231014e-05, "loss": 1.2791, "step": 6000 }, { "epoch": 0.8596189657642171, "grad_norm": 1.061842441558838, "learning_rate": 1.0160633684318088e-05, "loss": 1.3866, "step": 6001 }, { "epoch": 0.859762211717519, "grad_norm": 1.0045994520187378, "learning_rate": 1.0140265869597554e-05, "loss": 1.3542, "step": 6002 }, { "epoch": 0.8599054576708208, "grad_norm": 1.0613834857940674, "learning_rate": 1.0119917399454105e-05, "loss": 1.5329, "step": 6003 }, { "epoch": 0.8600487036241227, "grad_norm": 1.2301291227340698, "learning_rate": 1.0099588278268246e-05, "loss": 1.4949, "step": 6004 }, { "epoch": 0.8601919495774244, "grad_norm": 0.9867377281188965, "learning_rate": 1.0079278510416313e-05, "loss": 1.3574, "step": 6005 }, { "epoch": 0.8603351955307262, "grad_norm": 1.4309240579605103, "learning_rate": 1.0058988100270561e-05, "loss": 1.3548, "step": 6006 }, { "epoch": 0.8604784414840281, "grad_norm": 1.0366567373275757, "learning_rate": 1.0038717052198965e-05, "loss": 1.3271, "step": 6007 }, { "epoch": 0.8606216874373299, "grad_norm": 1.2306365966796875, "learning_rate": 1.0018465370565388e-05, "loss": 1.3101, "step": 6008 }, { "epoch": 0.8607649333906318, "grad_norm": 1.1581447124481201, "learning_rate": 9.998233059729523e-06, "loss": 1.3947, "step": 6009 }, { "epoch": 0.8609081793439335, "grad_norm": 1.0660598278045654, "learning_rate": 9.978020124046882e-06, "loss": 1.3289, "step": 6010 }, { "epoch": 0.8610514252972353, "grad_norm": 1.0561665296554565, "learning_rate": 9.957826567868822e-06, "loss": 1.6065, "step": 6011 }, { "epoch": 0.8611946712505372, "grad_norm": 1.0173041820526123, "learning_rate": 9.937652395542474e-06, "loss": 1.4442, "step": 6012 }, { "epoch": 0.861337917203839, "grad_norm": 1.059556245803833, "learning_rate": 9.917497611410875e-06, "loss": 1.4359, "step": 6013 }, { "epoch": 0.8614811631571409, "grad_norm": 1.2258607149124146, "learning_rate": 9.897362219812879e-06, "loss": 1.3255, "step": 6014 }, { "epoch": 0.8616244091104426, "grad_norm": 0.9293045997619629, "learning_rate": 9.877246225083082e-06, "loss": 1.4518, "step": 6015 }, { "epoch": 0.8617676550637444, "grad_norm": 1.060230016708374, "learning_rate": 9.857149631551955e-06, "loss": 1.517, "step": 6016 }, { "epoch": 0.8619109010170463, "grad_norm": 1.045097827911377, "learning_rate": 9.837072443545858e-06, "loss": 1.3968, "step": 6017 }, { "epoch": 0.8620541469703481, "grad_norm": 0.9710952639579773, "learning_rate": 9.81701466538687e-06, "loss": 1.3088, "step": 6018 }, { "epoch": 0.8621973929236499, "grad_norm": 0.9889569878578186, "learning_rate": 9.796976301392934e-06, "loss": 1.6315, "step": 6019 }, { "epoch": 0.8623406388769517, "grad_norm": 1.4043844938278198, "learning_rate": 9.776957355877825e-06, "loss": 1.4982, "step": 6020 }, { "epoch": 0.8624838848302535, "grad_norm": 1.0389546155929565, "learning_rate": 9.756957833151115e-06, "loss": 1.4787, "step": 6021 }, { "epoch": 0.8626271307835554, "grad_norm": 1.059665322303772, "learning_rate": 9.736977737518249e-06, "loss": 1.2921, "step": 6022 }, { "epoch": 0.8627703767368572, "grad_norm": 1.1262787580490112, "learning_rate": 9.717017073280366e-06, "loss": 1.4134, "step": 6023 }, { "epoch": 0.862913622690159, "grad_norm": 1.1173547506332397, "learning_rate": 9.697075844734571e-06, "loss": 1.2979, "step": 6024 }, { "epoch": 0.8630568686434609, "grad_norm": 1.2087897062301636, "learning_rate": 9.67715405617371e-06, "loss": 1.2733, "step": 6025 }, { "epoch": 0.8632001145967626, "grad_norm": 1.0024877786636353, "learning_rate": 9.657251711886427e-06, "loss": 1.3509, "step": 6026 }, { "epoch": 0.8633433605500644, "grad_norm": 0.9519560933113098, "learning_rate": 9.63736881615721e-06, "loss": 1.151, "step": 6027 }, { "epoch": 0.8634866065033663, "grad_norm": 1.2101024389266968, "learning_rate": 9.617505373266355e-06, "loss": 1.317, "step": 6028 }, { "epoch": 0.8636298524566681, "grad_norm": 1.1807539463043213, "learning_rate": 9.597661387489987e-06, "loss": 1.5106, "step": 6029 }, { "epoch": 0.86377309840997, "grad_norm": 1.0933598279953003, "learning_rate": 9.577836863099999e-06, "loss": 1.5389, "step": 6030 }, { "epoch": 0.8639163443632717, "grad_norm": 1.077688217163086, "learning_rate": 9.558031804364142e-06, "loss": 1.5173, "step": 6031 }, { "epoch": 0.8640595903165735, "grad_norm": 1.1793551445007324, "learning_rate": 9.538246215545953e-06, "loss": 1.4578, "step": 6032 }, { "epoch": 0.8642028362698754, "grad_norm": 1.1994574069976807, "learning_rate": 9.518480100904769e-06, "loss": 1.3562, "step": 6033 }, { "epoch": 0.8643460822231772, "grad_norm": 1.1399985551834106, "learning_rate": 9.498733464695719e-06, "loss": 1.3796, "step": 6034 }, { "epoch": 0.864489328176479, "grad_norm": 1.0083770751953125, "learning_rate": 9.479006311169835e-06, "loss": 1.2143, "step": 6035 }, { "epoch": 0.8646325741297808, "grad_norm": 1.0196020603179932, "learning_rate": 9.459298644573833e-06, "loss": 1.4158, "step": 6036 }, { "epoch": 0.8647758200830826, "grad_norm": 0.9705145359039307, "learning_rate": 9.439610469150295e-06, "loss": 1.3836, "step": 6037 }, { "epoch": 0.8649190660363845, "grad_norm": 0.9643406867980957, "learning_rate": 9.419941789137598e-06, "loss": 1.4293, "step": 6038 }, { "epoch": 0.8650623119896863, "grad_norm": 1.2338895797729492, "learning_rate": 9.400292608769933e-06, "loss": 1.3262, "step": 6039 }, { "epoch": 0.8652055579429881, "grad_norm": 1.0757371187210083, "learning_rate": 9.380662932277285e-06, "loss": 1.4438, "step": 6040 }, { "epoch": 0.8653488038962899, "grad_norm": 1.120244026184082, "learning_rate": 9.361052763885392e-06, "loss": 1.2985, "step": 6041 }, { "epoch": 0.8654920498495917, "grad_norm": 1.031241536140442, "learning_rate": 9.341462107815891e-06, "loss": 1.4219, "step": 6042 }, { "epoch": 0.8656352958028936, "grad_norm": 1.0601811408996582, "learning_rate": 9.321890968286173e-06, "loss": 1.2912, "step": 6043 }, { "epoch": 0.8657785417561954, "grad_norm": 1.145644187927246, "learning_rate": 9.302339349509371e-06, "loss": 1.3257, "step": 6044 }, { "epoch": 0.8659217877094972, "grad_norm": 1.062959909439087, "learning_rate": 9.28280725569447e-06, "loss": 1.4619, "step": 6045 }, { "epoch": 0.8660650336627991, "grad_norm": 1.0325846672058105, "learning_rate": 9.263294691046297e-06, "loss": 1.3487, "step": 6046 }, { "epoch": 0.8662082796161008, "grad_norm": 1.0434093475341797, "learning_rate": 9.243801659765383e-06, "loss": 1.4288, "step": 6047 }, { "epoch": 0.8663515255694026, "grad_norm": 0.9925659894943237, "learning_rate": 9.224328166048101e-06, "loss": 1.4375, "step": 6048 }, { "epoch": 0.8664947715227045, "grad_norm": 1.2259210348129272, "learning_rate": 9.204874214086611e-06, "loss": 1.4381, "step": 6049 }, { "epoch": 0.8666380174760063, "grad_norm": 0.8642308712005615, "learning_rate": 9.185439808068885e-06, "loss": 1.3681, "step": 6050 }, { "epoch": 0.8667812634293082, "grad_norm": 0.990006148815155, "learning_rate": 9.166024952178665e-06, "loss": 1.3661, "step": 6051 }, { "epoch": 0.8669245093826099, "grad_norm": 1.1940925121307373, "learning_rate": 9.14662965059544e-06, "loss": 1.5103, "step": 6052 }, { "epoch": 0.8670677553359117, "grad_norm": 1.0651370286941528, "learning_rate": 9.127253907494604e-06, "loss": 1.3593, "step": 6053 }, { "epoch": 0.8672110012892136, "grad_norm": 1.1549527645111084, "learning_rate": 9.107897727047276e-06, "loss": 1.2538, "step": 6054 }, { "epoch": 0.8673542472425154, "grad_norm": 1.012184739112854, "learning_rate": 9.088561113420302e-06, "loss": 1.4339, "step": 6055 }, { "epoch": 0.8674974931958173, "grad_norm": 1.136501669883728, "learning_rate": 9.069244070776428e-06, "loss": 1.49, "step": 6056 }, { "epoch": 0.867640739149119, "grad_norm": 1.0919582843780518, "learning_rate": 9.049946603274106e-06, "loss": 1.4035, "step": 6057 }, { "epoch": 0.8677839851024208, "grad_norm": 1.2973735332489014, "learning_rate": 9.030668715067625e-06, "loss": 1.3846, "step": 6058 }, { "epoch": 0.8679272310557227, "grad_norm": 0.9568747282028198, "learning_rate": 9.011410410307019e-06, "loss": 1.4862, "step": 6059 }, { "epoch": 0.8680704770090245, "grad_norm": 1.1177923679351807, "learning_rate": 8.99217169313814e-06, "loss": 1.4924, "step": 6060 }, { "epoch": 0.8682137229623264, "grad_norm": 1.0631240606307983, "learning_rate": 8.972952567702619e-06, "loss": 1.5309, "step": 6061 }, { "epoch": 0.8683569689156281, "grad_norm": 0.8184772729873657, "learning_rate": 8.953753038137825e-06, "loss": 1.4991, "step": 6062 }, { "epoch": 0.8685002148689299, "grad_norm": 0.9700676202774048, "learning_rate": 8.934573108576937e-06, "loss": 1.4232, "step": 6063 }, { "epoch": 0.8686434608222318, "grad_norm": 1.1821479797363281, "learning_rate": 8.915412783148969e-06, "loss": 1.4882, "step": 6064 }, { "epoch": 0.8687867067755336, "grad_norm": 1.0869390964508057, "learning_rate": 8.896272065978628e-06, "loss": 1.3837, "step": 6065 }, { "epoch": 0.8689299527288354, "grad_norm": 1.1160736083984375, "learning_rate": 8.87715096118642e-06, "loss": 1.4212, "step": 6066 }, { "epoch": 0.8690731986821373, "grad_norm": 1.1133400201797485, "learning_rate": 8.858049472888707e-06, "loss": 1.3743, "step": 6067 }, { "epoch": 0.869216444635439, "grad_norm": 1.0873199701309204, "learning_rate": 8.838967605197512e-06, "loss": 1.3837, "step": 6068 }, { "epoch": 0.8693596905887409, "grad_norm": 1.088273525238037, "learning_rate": 8.819905362220704e-06, "loss": 1.4743, "step": 6069 }, { "epoch": 0.8695029365420427, "grad_norm": 1.0866031646728516, "learning_rate": 8.800862748061911e-06, "loss": 1.4741, "step": 6070 }, { "epoch": 0.8696461824953445, "grad_norm": 1.2942227125167847, "learning_rate": 8.781839766820543e-06, "loss": 1.4369, "step": 6071 }, { "epoch": 0.8697894284486464, "grad_norm": 1.0024195909500122, "learning_rate": 8.762836422591802e-06, "loss": 1.3551, "step": 6072 }, { "epoch": 0.8699326744019481, "grad_norm": 1.078922152519226, "learning_rate": 8.743852719466561e-06, "loss": 1.2113, "step": 6073 }, { "epoch": 0.8700759203552499, "grad_norm": 1.08243727684021, "learning_rate": 8.724888661531616e-06, "loss": 1.3558, "step": 6074 }, { "epoch": 0.8702191663085518, "grad_norm": 1.016839861869812, "learning_rate": 8.705944252869446e-06, "loss": 1.4482, "step": 6075 }, { "epoch": 0.8703624122618536, "grad_norm": 1.0688939094543457, "learning_rate": 8.68701949755829e-06, "loss": 1.3105, "step": 6076 }, { "epoch": 0.8705056582151555, "grad_norm": 1.0628862380981445, "learning_rate": 8.668114399672179e-06, "loss": 1.3064, "step": 6077 }, { "epoch": 0.8706489041684572, "grad_norm": 1.0293161869049072, "learning_rate": 8.649228963280931e-06, "loss": 1.3684, "step": 6078 }, { "epoch": 0.870792150121759, "grad_norm": 1.1159167289733887, "learning_rate": 8.630363192450109e-06, "loss": 1.305, "step": 6079 }, { "epoch": 0.8709353960750609, "grad_norm": 1.0850244760513306, "learning_rate": 8.611517091241051e-06, "loss": 1.5661, "step": 6080 }, { "epoch": 0.8710786420283627, "grad_norm": 1.0197510719299316, "learning_rate": 8.592690663710834e-06, "loss": 1.3131, "step": 6081 }, { "epoch": 0.8712218879816646, "grad_norm": 1.0595096349716187, "learning_rate": 8.573883913912361e-06, "loss": 1.2878, "step": 6082 }, { "epoch": 0.8713651339349663, "grad_norm": 1.305173635482788, "learning_rate": 8.555096845894228e-06, "loss": 1.4644, "step": 6083 }, { "epoch": 0.8715083798882681, "grad_norm": 1.0326265096664429, "learning_rate": 8.53632946370081e-06, "loss": 1.4134, "step": 6084 }, { "epoch": 0.87165162584157, "grad_norm": 1.087653398513794, "learning_rate": 8.517581771372318e-06, "loss": 1.1652, "step": 6085 }, { "epoch": 0.8717948717948718, "grad_norm": 0.9684380888938904, "learning_rate": 8.498853772944614e-06, "loss": 1.571, "step": 6086 }, { "epoch": 0.8719381177481736, "grad_norm": 1.153364658355713, "learning_rate": 8.480145472449386e-06, "loss": 1.2698, "step": 6087 }, { "epoch": 0.8720813637014755, "grad_norm": 1.1119121313095093, "learning_rate": 8.461456873914087e-06, "loss": 1.5464, "step": 6088 }, { "epoch": 0.8722246096547772, "grad_norm": 1.0873825550079346, "learning_rate": 8.442787981361877e-06, "loss": 1.3236, "step": 6089 }, { "epoch": 0.8723678556080791, "grad_norm": 1.0571669340133667, "learning_rate": 8.424138798811753e-06, "loss": 1.344, "step": 6090 }, { "epoch": 0.8725111015613809, "grad_norm": 1.2416207790374756, "learning_rate": 8.40550933027836e-06, "loss": 1.1794, "step": 6091 }, { "epoch": 0.8726543475146827, "grad_norm": 1.1220674514770508, "learning_rate": 8.386899579772223e-06, "loss": 1.3089, "step": 6092 }, { "epoch": 0.8727975934679846, "grad_norm": 1.4793025255203247, "learning_rate": 8.368309551299536e-06, "loss": 1.3771, "step": 6093 }, { "epoch": 0.8729408394212863, "grad_norm": 1.256934404373169, "learning_rate": 8.349739248862265e-06, "loss": 1.3173, "step": 6094 }, { "epoch": 0.8730840853745881, "grad_norm": 1.1983052492141724, "learning_rate": 8.331188676458113e-06, "loss": 1.4946, "step": 6095 }, { "epoch": 0.87322733132789, "grad_norm": 1.003615379333496, "learning_rate": 8.312657838080629e-06, "loss": 1.4727, "step": 6096 }, { "epoch": 0.8733705772811918, "grad_norm": 0.9718660116195679, "learning_rate": 8.294146737718988e-06, "loss": 1.3032, "step": 6097 }, { "epoch": 0.8735138232344937, "grad_norm": 1.0460987091064453, "learning_rate": 8.275655379358183e-06, "loss": 1.4446, "step": 6098 }, { "epoch": 0.8736570691877954, "grad_norm": 1.1340272426605225, "learning_rate": 8.257183766978948e-06, "loss": 1.5376, "step": 6099 }, { "epoch": 0.8738003151410972, "grad_norm": 1.082486867904663, "learning_rate": 8.238731904557773e-06, "loss": 1.2932, "step": 6100 }, { "epoch": 0.8739435610943991, "grad_norm": 1.0665994882583618, "learning_rate": 8.220299796066899e-06, "loss": 1.2606, "step": 6101 }, { "epoch": 0.8740868070477009, "grad_norm": 1.072098970413208, "learning_rate": 8.201887445474244e-06, "loss": 1.2838, "step": 6102 }, { "epoch": 0.8742300530010028, "grad_norm": 1.0588206052780151, "learning_rate": 8.18349485674359e-06, "loss": 1.4828, "step": 6103 }, { "epoch": 0.8743732989543045, "grad_norm": 1.1506743431091309, "learning_rate": 8.165122033834393e-06, "loss": 1.4904, "step": 6104 }, { "epoch": 0.8745165449076063, "grad_norm": 1.2753790616989136, "learning_rate": 8.146768980701858e-06, "loss": 1.304, "step": 6105 }, { "epoch": 0.8746597908609082, "grad_norm": 1.1703521013259888, "learning_rate": 8.128435701296943e-06, "loss": 1.3782, "step": 6106 }, { "epoch": 0.87480303681421, "grad_norm": 1.530335783958435, "learning_rate": 8.110122199566362e-06, "loss": 1.2855, "step": 6107 }, { "epoch": 0.8749462827675119, "grad_norm": 1.3372070789337158, "learning_rate": 8.091828479452535e-06, "loss": 1.4336, "step": 6108 }, { "epoch": 0.8750895287208137, "grad_norm": 1.0343393087387085, "learning_rate": 8.073554544893668e-06, "loss": 1.3849, "step": 6109 }, { "epoch": 0.8752327746741154, "grad_norm": 1.2149019241333008, "learning_rate": 8.055300399823684e-06, "loss": 1.342, "step": 6110 }, { "epoch": 0.8753760206274173, "grad_norm": 1.1708725690841675, "learning_rate": 8.03706604817226e-06, "loss": 1.2752, "step": 6111 }, { "epoch": 0.8755192665807191, "grad_norm": 0.9658020734786987, "learning_rate": 8.018851493864777e-06, "loss": 1.6042, "step": 6112 }, { "epoch": 0.8756625125340209, "grad_norm": 1.0494414567947388, "learning_rate": 8.000656740822365e-06, "loss": 1.2579, "step": 6113 }, { "epoch": 0.8758057584873228, "grad_norm": 1.2507928609848022, "learning_rate": 7.98248179296196e-06, "loss": 1.3525, "step": 6114 }, { "epoch": 0.8759490044406245, "grad_norm": 1.2697725296020508, "learning_rate": 7.964326654196119e-06, "loss": 1.358, "step": 6115 }, { "epoch": 0.8760922503939264, "grad_norm": 1.1196260452270508, "learning_rate": 7.946191328433228e-06, "loss": 1.374, "step": 6116 }, { "epoch": 0.8762354963472282, "grad_norm": 1.3061590194702148, "learning_rate": 7.928075819577375e-06, "loss": 1.3393, "step": 6117 }, { "epoch": 0.87637874230053, "grad_norm": 1.0822381973266602, "learning_rate": 7.909980131528361e-06, "loss": 1.4585, "step": 6118 }, { "epoch": 0.8765219882538319, "grad_norm": 1.1793339252471924, "learning_rate": 7.891904268181772e-06, "loss": 1.2471, "step": 6119 }, { "epoch": 0.8766652342071336, "grad_norm": 1.130366325378418, "learning_rate": 7.873848233428826e-06, "loss": 1.3817, "step": 6120 }, { "epoch": 0.8768084801604354, "grad_norm": 0.9909486174583435, "learning_rate": 7.855812031156618e-06, "loss": 1.4232, "step": 6121 }, { "epoch": 0.8769517261137373, "grad_norm": 1.0860902070999146, "learning_rate": 7.837795665247882e-06, "loss": 1.4136, "step": 6122 }, { "epoch": 0.8770949720670391, "grad_norm": 1.145939588546753, "learning_rate": 7.819799139581051e-06, "loss": 1.5638, "step": 6123 }, { "epoch": 0.877238218020341, "grad_norm": 1.0899732112884521, "learning_rate": 7.80182245803035e-06, "loss": 1.2457, "step": 6124 }, { "epoch": 0.8773814639736427, "grad_norm": 1.195052981376648, "learning_rate": 7.783865624465758e-06, "loss": 1.4705, "step": 6125 }, { "epoch": 0.8775247099269445, "grad_norm": 1.0247740745544434, "learning_rate": 7.765928642752884e-06, "loss": 1.5351, "step": 6126 }, { "epoch": 0.8776679558802464, "grad_norm": 0.9951050281524658, "learning_rate": 7.74801151675314e-06, "loss": 1.3996, "step": 6127 }, { "epoch": 0.8778112018335482, "grad_norm": 0.9022167921066284, "learning_rate": 7.730114250323627e-06, "loss": 1.4954, "step": 6128 }, { "epoch": 0.8779544477868501, "grad_norm": 0.9744040369987488, "learning_rate": 7.712236847317188e-06, "loss": 1.4274, "step": 6129 }, { "epoch": 0.8780976937401518, "grad_norm": 1.1184483766555786, "learning_rate": 7.6943793115824e-06, "loss": 1.3383, "step": 6130 }, { "epoch": 0.8782409396934536, "grad_norm": 1.1000092029571533, "learning_rate": 7.67654164696352e-06, "loss": 1.5549, "step": 6131 }, { "epoch": 0.8783841856467555, "grad_norm": 1.0265151262283325, "learning_rate": 7.658723857300599e-06, "loss": 1.4362, "step": 6132 }, { "epoch": 0.8785274316000573, "grad_norm": 1.0515469312667847, "learning_rate": 7.640925946429323e-06, "loss": 1.49, "step": 6133 }, { "epoch": 0.8786706775533591, "grad_norm": 1.2573654651641846, "learning_rate": 7.623147918181139e-06, "loss": 1.2438, "step": 6134 }, { "epoch": 0.878813923506661, "grad_norm": 1.1230283975601196, "learning_rate": 7.605389776383276e-06, "loss": 1.5643, "step": 6135 }, { "epoch": 0.8789571694599627, "grad_norm": 1.0806825160980225, "learning_rate": 7.587651524858564e-06, "loss": 1.4948, "step": 6136 }, { "epoch": 0.8791004154132646, "grad_norm": 1.056382417678833, "learning_rate": 7.569933167425625e-06, "loss": 1.4369, "step": 6137 }, { "epoch": 0.8792436613665664, "grad_norm": 1.0525827407836914, "learning_rate": 7.552234707898787e-06, "loss": 1.2996, "step": 6138 }, { "epoch": 0.8793869073198682, "grad_norm": 1.069077968597412, "learning_rate": 7.53455615008809e-06, "loss": 1.4151, "step": 6139 }, { "epoch": 0.8795301532731701, "grad_norm": 1.147133469581604, "learning_rate": 7.516897497799302e-06, "loss": 1.3841, "step": 6140 }, { "epoch": 0.8796733992264718, "grad_norm": 0.9963047504425049, "learning_rate": 7.4992587548338485e-06, "loss": 1.3876, "step": 6141 }, { "epoch": 0.8798166451797736, "grad_norm": 1.0968685150146484, "learning_rate": 7.48163992498897e-06, "loss": 1.4206, "step": 6142 }, { "epoch": 0.8799598911330755, "grad_norm": 1.0552432537078857, "learning_rate": 7.464041012057554e-06, "loss": 1.3697, "step": 6143 }, { "epoch": 0.8801031370863773, "grad_norm": 1.0118249654769897, "learning_rate": 7.446462019828182e-06, "loss": 1.2755, "step": 6144 }, { "epoch": 0.8802463830396792, "grad_norm": 1.1240556240081787, "learning_rate": 7.428902952085171e-06, "loss": 1.2693, "step": 6145 }, { "epoch": 0.8803896289929809, "grad_norm": 1.1441346406936646, "learning_rate": 7.411363812608618e-06, "loss": 1.3218, "step": 6146 }, { "epoch": 0.8805328749462827, "grad_norm": 1.3975311517715454, "learning_rate": 7.393844605174205e-06, "loss": 1.2018, "step": 6147 }, { "epoch": 0.8806761208995846, "grad_norm": 0.9469305276870728, "learning_rate": 7.376345333553403e-06, "loss": 1.365, "step": 6148 }, { "epoch": 0.8808193668528864, "grad_norm": 1.0788648128509521, "learning_rate": 7.358866001513364e-06, "loss": 1.3918, "step": 6149 }, { "epoch": 0.8809626128061883, "grad_norm": 1.1089503765106201, "learning_rate": 7.3414066128169705e-06, "loss": 1.4112, "step": 6150 }, { "epoch": 0.88110585875949, "grad_norm": 1.0184285640716553, "learning_rate": 7.323967171222801e-06, "loss": 1.3326, "step": 6151 }, { "epoch": 0.8812491047127918, "grad_norm": 1.087239146232605, "learning_rate": 7.306547680485088e-06, "loss": 1.4886, "step": 6152 }, { "epoch": 0.8813923506660937, "grad_norm": 1.04493248462677, "learning_rate": 7.289148144353875e-06, "loss": 1.3661, "step": 6153 }, { "epoch": 0.8815355966193955, "grad_norm": 0.9697790741920471, "learning_rate": 7.271768566574843e-06, "loss": 1.316, "step": 6154 }, { "epoch": 0.8816788425726974, "grad_norm": 1.4127305746078491, "learning_rate": 7.2544089508893575e-06, "loss": 1.3447, "step": 6155 }, { "epoch": 0.8818220885259992, "grad_norm": 1.0601903200149536, "learning_rate": 7.237069301034516e-06, "loss": 1.4913, "step": 6156 }, { "epoch": 0.8819653344793009, "grad_norm": 1.2036266326904297, "learning_rate": 7.219749620743144e-06, "loss": 1.2353, "step": 6157 }, { "epoch": 0.8821085804326028, "grad_norm": 1.3706302642822266, "learning_rate": 7.202449913743714e-06, "loss": 1.4462, "step": 6158 }, { "epoch": 0.8822518263859046, "grad_norm": 1.033921241760254, "learning_rate": 7.185170183760437e-06, "loss": 1.3452, "step": 6159 }, { "epoch": 0.8823950723392064, "grad_norm": 0.9142951965332031, "learning_rate": 7.167910434513214e-06, "loss": 1.3113, "step": 6160 }, { "epoch": 0.8825383182925083, "grad_norm": 1.0199739933013916, "learning_rate": 7.15067066971764e-06, "loss": 1.5048, "step": 6161 }, { "epoch": 0.88268156424581, "grad_norm": 1.2085869312286377, "learning_rate": 7.133450893085003e-06, "loss": 1.476, "step": 6162 }, { "epoch": 0.8828248101991119, "grad_norm": 1.1762008666992188, "learning_rate": 7.116251108322281e-06, "loss": 1.434, "step": 6163 }, { "epoch": 0.8829680561524137, "grad_norm": 1.0077625513076782, "learning_rate": 7.099071319132211e-06, "loss": 1.4332, "step": 6164 }, { "epoch": 0.8831113021057155, "grad_norm": 1.0314888954162598, "learning_rate": 7.081911529213126e-06, "loss": 1.2855, "step": 6165 }, { "epoch": 0.8832545480590174, "grad_norm": 1.039973497390747, "learning_rate": 7.064771742259124e-06, "loss": 1.5249, "step": 6166 }, { "epoch": 0.8833977940123191, "grad_norm": 1.0815176963806152, "learning_rate": 7.047651961959978e-06, "loss": 1.4006, "step": 6167 }, { "epoch": 0.883541039965621, "grad_norm": 1.2100239992141724, "learning_rate": 7.03055219200115e-06, "loss": 1.3623, "step": 6168 }, { "epoch": 0.8836842859189228, "grad_norm": 1.2314960956573486, "learning_rate": 7.013472436063817e-06, "loss": 1.271, "step": 6169 }, { "epoch": 0.8838275318722246, "grad_norm": 1.0538991689682007, "learning_rate": 6.996412697824772e-06, "loss": 1.3367, "step": 6170 }, { "epoch": 0.8839707778255265, "grad_norm": 1.0745586156845093, "learning_rate": 6.979372980956611e-06, "loss": 1.3359, "step": 6171 }, { "epoch": 0.8841140237788282, "grad_norm": 1.1423468589782715, "learning_rate": 6.962353289127555e-06, "loss": 1.2749, "step": 6172 }, { "epoch": 0.88425726973213, "grad_norm": 1.3436617851257324, "learning_rate": 6.945353626001494e-06, "loss": 1.3652, "step": 6173 }, { "epoch": 0.8844005156854319, "grad_norm": 1.0537688732147217, "learning_rate": 6.928373995238047e-06, "loss": 1.4774, "step": 6174 }, { "epoch": 0.8845437616387337, "grad_norm": 0.9605186581611633, "learning_rate": 6.911414400492544e-06, "loss": 1.4939, "step": 6175 }, { "epoch": 0.8846870075920356, "grad_norm": 1.0892926454544067, "learning_rate": 6.89447484541591e-06, "loss": 1.5917, "step": 6176 }, { "epoch": 0.8848302535453374, "grad_norm": 1.176377534866333, "learning_rate": 6.87755533365485e-06, "loss": 1.5501, "step": 6177 }, { "epoch": 0.8849734994986391, "grad_norm": 1.0803096294403076, "learning_rate": 6.860655868851696e-06, "loss": 1.4181, "step": 6178 }, { "epoch": 0.885116745451941, "grad_norm": 1.1753509044647217, "learning_rate": 6.84377645464449e-06, "loss": 1.333, "step": 6179 }, { "epoch": 0.8852599914052428, "grad_norm": 1.0639952421188354, "learning_rate": 6.826917094666973e-06, "loss": 1.3638, "step": 6180 }, { "epoch": 0.8854032373585446, "grad_norm": 1.0984638929367065, "learning_rate": 6.810077792548508e-06, "loss": 1.3114, "step": 6181 }, { "epoch": 0.8855464833118465, "grad_norm": 1.081886649131775, "learning_rate": 6.793258551914206e-06, "loss": 1.4676, "step": 6182 }, { "epoch": 0.8856897292651482, "grad_norm": 1.2206023931503296, "learning_rate": 6.776459376384847e-06, "loss": 1.3366, "step": 6183 }, { "epoch": 0.8858329752184501, "grad_norm": 1.1014976501464844, "learning_rate": 6.759680269576852e-06, "loss": 1.5732, "step": 6184 }, { "epoch": 0.8859762211717519, "grad_norm": 1.1496285200119019, "learning_rate": 6.74292123510234e-06, "loss": 1.3276, "step": 6185 }, { "epoch": 0.8861194671250537, "grad_norm": 1.2254159450531006, "learning_rate": 6.726182276569148e-06, "loss": 1.386, "step": 6186 }, { "epoch": 0.8862627130783556, "grad_norm": 1.1976909637451172, "learning_rate": 6.709463397580728e-06, "loss": 1.3907, "step": 6187 }, { "epoch": 0.8864059590316573, "grad_norm": 1.1922142505645752, "learning_rate": 6.692764601736268e-06, "loss": 1.3916, "step": 6188 }, { "epoch": 0.8865492049849591, "grad_norm": 0.9329336285591125, "learning_rate": 6.67608589263059e-06, "loss": 1.4402, "step": 6189 }, { "epoch": 0.886692450938261, "grad_norm": 1.071075677871704, "learning_rate": 6.659427273854224e-06, "loss": 1.4704, "step": 6190 }, { "epoch": 0.8868356968915628, "grad_norm": 0.9965466260910034, "learning_rate": 6.642788748993323e-06, "loss": 1.3147, "step": 6191 }, { "epoch": 0.8869789428448647, "grad_norm": 0.9360291957855225, "learning_rate": 6.626170321629776e-06, "loss": 1.5308, "step": 6192 }, { "epoch": 0.8871221887981664, "grad_norm": 1.0450764894485474, "learning_rate": 6.609571995341135e-06, "loss": 1.3125, "step": 6193 }, { "epoch": 0.8872654347514682, "grad_norm": 1.097119688987732, "learning_rate": 6.592993773700573e-06, "loss": 1.4344, "step": 6194 }, { "epoch": 0.8874086807047701, "grad_norm": 1.11098051071167, "learning_rate": 6.576435660276969e-06, "loss": 1.3367, "step": 6195 }, { "epoch": 0.8875519266580719, "grad_norm": 1.2049585580825806, "learning_rate": 6.559897658634928e-06, "loss": 1.3774, "step": 6196 }, { "epoch": 0.8876951726113738, "grad_norm": 0.9086595177650452, "learning_rate": 6.5433797723346235e-06, "loss": 1.469, "step": 6197 }, { "epoch": 0.8878384185646756, "grad_norm": 1.185227632522583, "learning_rate": 6.526882004931967e-06, "loss": 1.3401, "step": 6198 }, { "epoch": 0.8879816645179773, "grad_norm": 1.064042329788208, "learning_rate": 6.510404359978506e-06, "loss": 1.3726, "step": 6199 }, { "epoch": 0.8881249104712792, "grad_norm": 1.0553909540176392, "learning_rate": 6.493946841021481e-06, "loss": 1.4104, "step": 6200 }, { "epoch": 0.888268156424581, "grad_norm": 0.935221254825592, "learning_rate": 6.477509451603791e-06, "loss": 1.4899, "step": 6201 }, { "epoch": 0.8884114023778829, "grad_norm": 0.9904655814170837, "learning_rate": 6.461092195263973e-06, "loss": 1.5958, "step": 6202 }, { "epoch": 0.8885546483311847, "grad_norm": 1.0689826011657715, "learning_rate": 6.444695075536289e-06, "loss": 1.3991, "step": 6203 }, { "epoch": 0.8886978942844864, "grad_norm": 1.0686777830123901, "learning_rate": 6.428318095950647e-06, "loss": 1.4976, "step": 6204 }, { "epoch": 0.8888411402377883, "grad_norm": 0.9992170929908752, "learning_rate": 6.411961260032551e-06, "loss": 1.4536, "step": 6205 }, { "epoch": 0.8889843861910901, "grad_norm": 0.9994930028915405, "learning_rate": 6.39562457130326e-06, "loss": 1.4394, "step": 6206 }, { "epoch": 0.8891276321443919, "grad_norm": 1.087852120399475, "learning_rate": 6.379308033279641e-06, "loss": 1.4414, "step": 6207 }, { "epoch": 0.8892708780976938, "grad_norm": 1.007928729057312, "learning_rate": 6.363011649474249e-06, "loss": 1.426, "step": 6208 }, { "epoch": 0.8894141240509955, "grad_norm": 1.0900826454162598, "learning_rate": 6.3467354233953e-06, "loss": 1.2698, "step": 6209 }, { "epoch": 0.8895573700042974, "grad_norm": 1.2544763088226318, "learning_rate": 6.3304793585466595e-06, "loss": 1.3357, "step": 6210 }, { "epoch": 0.8897006159575992, "grad_norm": 1.0173091888427734, "learning_rate": 6.3142434584278596e-06, "loss": 1.3894, "step": 6211 }, { "epoch": 0.889843861910901, "grad_norm": 1.051266074180603, "learning_rate": 6.2980277265340726e-06, "loss": 1.3084, "step": 6212 }, { "epoch": 0.8899871078642029, "grad_norm": 1.135063886642456, "learning_rate": 6.281832166356127e-06, "loss": 1.4347, "step": 6213 }, { "epoch": 0.8901303538175046, "grad_norm": 1.0632984638214111, "learning_rate": 6.265656781380591e-06, "loss": 1.3738, "step": 6214 }, { "epoch": 0.8902735997708064, "grad_norm": 0.927520751953125, "learning_rate": 6.249501575089567e-06, "loss": 1.3379, "step": 6215 }, { "epoch": 0.8904168457241083, "grad_norm": 1.1920615434646606, "learning_rate": 6.233366550960884e-06, "loss": 1.2079, "step": 6216 }, { "epoch": 0.8905600916774101, "grad_norm": 1.1692345142364502, "learning_rate": 6.2172517124680305e-06, "loss": 1.4754, "step": 6217 }, { "epoch": 0.890703337630712, "grad_norm": 1.0433944463729858, "learning_rate": 6.20115706308011e-06, "loss": 1.352, "step": 6218 }, { "epoch": 0.8908465835840138, "grad_norm": 0.9226456880569458, "learning_rate": 6.185082606261927e-06, "loss": 1.4443, "step": 6219 }, { "epoch": 0.8909898295373155, "grad_norm": 1.0460840463638306, "learning_rate": 6.169028345473881e-06, "loss": 1.446, "step": 6220 }, { "epoch": 0.8911330754906174, "grad_norm": 1.1735236644744873, "learning_rate": 6.152994284172076e-06, "loss": 1.2339, "step": 6221 }, { "epoch": 0.8912763214439192, "grad_norm": 1.2027605772018433, "learning_rate": 6.136980425808281e-06, "loss": 1.3602, "step": 6222 }, { "epoch": 0.8914195673972211, "grad_norm": 1.135356068611145, "learning_rate": 6.1209867738298286e-06, "loss": 1.3209, "step": 6223 }, { "epoch": 0.8915628133505229, "grad_norm": 1.140559434890747, "learning_rate": 6.105013331679754e-06, "loss": 1.4739, "step": 6224 }, { "epoch": 0.8917060593038246, "grad_norm": 1.170394778251648, "learning_rate": 6.089060102796807e-06, "loss": 1.3255, "step": 6225 }, { "epoch": 0.8918493052571265, "grad_norm": 1.0642247200012207, "learning_rate": 6.073127090615271e-06, "loss": 1.3135, "step": 6226 }, { "epoch": 0.8919925512104283, "grad_norm": 1.0005143880844116, "learning_rate": 6.057214298565151e-06, "loss": 1.4081, "step": 6227 }, { "epoch": 0.8921357971637301, "grad_norm": 1.0217949151992798, "learning_rate": 6.041321730072058e-06, "loss": 1.3107, "step": 6228 }, { "epoch": 0.892279043117032, "grad_norm": 1.41926908493042, "learning_rate": 6.025449388557281e-06, "loss": 1.3773, "step": 6229 }, { "epoch": 0.8924222890703337, "grad_norm": 1.2625526189804077, "learning_rate": 6.009597277437762e-06, "loss": 1.4789, "step": 6230 }, { "epoch": 0.8925655350236356, "grad_norm": 1.2700263261795044, "learning_rate": 5.9937654001260164e-06, "loss": 1.2987, "step": 6231 }, { "epoch": 0.8927087809769374, "grad_norm": 1.1416374444961548, "learning_rate": 5.977953760030297e-06, "loss": 1.2169, "step": 6232 }, { "epoch": 0.8928520269302392, "grad_norm": 1.0866599082946777, "learning_rate": 5.962162360554469e-06, "loss": 1.2683, "step": 6233 }, { "epoch": 0.8929952728835411, "grad_norm": 1.0845009088516235, "learning_rate": 5.9463912050979895e-06, "loss": 1.503, "step": 6234 }, { "epoch": 0.8931385188368428, "grad_norm": 1.125756025314331, "learning_rate": 5.930640297056022e-06, "loss": 1.4683, "step": 6235 }, { "epoch": 0.8932817647901447, "grad_norm": 1.233464241027832, "learning_rate": 5.914909639819344e-06, "loss": 1.3926, "step": 6236 }, { "epoch": 0.8934250107434465, "grad_norm": 1.1848875284194946, "learning_rate": 5.899199236774377e-06, "loss": 1.4587, "step": 6237 }, { "epoch": 0.8935682566967483, "grad_norm": 1.0152925252914429, "learning_rate": 5.883509091303174e-06, "loss": 1.4759, "step": 6238 }, { "epoch": 0.8937115026500502, "grad_norm": 0.9698759317398071, "learning_rate": 5.867839206783454e-06, "loss": 1.4933, "step": 6239 }, { "epoch": 0.8938547486033519, "grad_norm": 1.1864376068115234, "learning_rate": 5.852189586588552e-06, "loss": 1.2575, "step": 6240 }, { "epoch": 0.8939979945566537, "grad_norm": 1.143431544303894, "learning_rate": 5.836560234087418e-06, "loss": 1.441, "step": 6241 }, { "epoch": 0.8941412405099556, "grad_norm": 1.1181886196136475, "learning_rate": 5.820951152644671e-06, "loss": 1.2594, "step": 6242 }, { "epoch": 0.8942844864632574, "grad_norm": 1.0237746238708496, "learning_rate": 5.805362345620602e-06, "loss": 1.2846, "step": 6243 }, { "epoch": 0.8944277324165593, "grad_norm": 1.157672643661499, "learning_rate": 5.7897938163710365e-06, "loss": 1.2023, "step": 6244 }, { "epoch": 0.8945709783698611, "grad_norm": 0.9851744771003723, "learning_rate": 5.7742455682475384e-06, "loss": 1.1994, "step": 6245 }, { "epoch": 0.8947142243231628, "grad_norm": 1.0079066753387451, "learning_rate": 5.758717604597242e-06, "loss": 1.5645, "step": 6246 }, { "epoch": 0.8948574702764647, "grad_norm": 1.1544268131256104, "learning_rate": 5.743209928762927e-06, "loss": 1.3554, "step": 6247 }, { "epoch": 0.8950007162297665, "grad_norm": 1.0780153274536133, "learning_rate": 5.727722544083036e-06, "loss": 1.3706, "step": 6248 }, { "epoch": 0.8951439621830684, "grad_norm": 1.1990412473678589, "learning_rate": 5.71225545389158e-06, "loss": 1.2863, "step": 6249 }, { "epoch": 0.8952872081363702, "grad_norm": 1.0910295248031616, "learning_rate": 5.696808661518283e-06, "loss": 1.4708, "step": 6250 }, { "epoch": 0.8954304540896719, "grad_norm": 0.9389721155166626, "learning_rate": 5.681382170288452e-06, "loss": 1.3056, "step": 6251 }, { "epoch": 0.8955737000429738, "grad_norm": 1.16049063205719, "learning_rate": 5.665975983522997e-06, "loss": 1.4097, "step": 6252 }, { "epoch": 0.8957169459962756, "grad_norm": 1.180406928062439, "learning_rate": 5.6505901045385e-06, "loss": 1.4388, "step": 6253 }, { "epoch": 0.8958601919495774, "grad_norm": 1.3661245107650757, "learning_rate": 5.635224536647188e-06, "loss": 1.5029, "step": 6254 }, { "epoch": 0.8960034379028793, "grad_norm": 1.1520068645477295, "learning_rate": 5.619879283156859e-06, "loss": 1.2789, "step": 6255 }, { "epoch": 0.896146683856181, "grad_norm": 1.054107904434204, "learning_rate": 5.604554347370983e-06, "loss": 1.3756, "step": 6256 }, { "epoch": 0.8962899298094829, "grad_norm": 1.1637687683105469, "learning_rate": 5.5892497325886195e-06, "loss": 1.4209, "step": 6257 }, { "epoch": 0.8964331757627847, "grad_norm": 1.2004553079605103, "learning_rate": 5.573965442104489e-06, "loss": 1.4547, "step": 6258 }, { "epoch": 0.8965764217160865, "grad_norm": 1.2279695272445679, "learning_rate": 5.558701479208916e-06, "loss": 1.3911, "step": 6259 }, { "epoch": 0.8967196676693884, "grad_norm": 1.06075918674469, "learning_rate": 5.5434578471878605e-06, "loss": 1.4286, "step": 6260 }, { "epoch": 0.8968629136226901, "grad_norm": 1.3761261701583862, "learning_rate": 5.528234549322908e-06, "loss": 1.4348, "step": 6261 }, { "epoch": 0.897006159575992, "grad_norm": 1.0343631505966187, "learning_rate": 5.513031588891226e-06, "loss": 1.2258, "step": 6262 }, { "epoch": 0.8971494055292938, "grad_norm": 1.0171232223510742, "learning_rate": 5.497848969165631e-06, "loss": 1.5321, "step": 6263 }, { "epoch": 0.8972926514825956, "grad_norm": 1.214233636856079, "learning_rate": 5.482686693414629e-06, "loss": 1.266, "step": 6264 }, { "epoch": 0.8974358974358975, "grad_norm": 1.2685524225234985, "learning_rate": 5.467544764902221e-06, "loss": 1.3513, "step": 6265 }, { "epoch": 0.8975791433891993, "grad_norm": 1.0766239166259766, "learning_rate": 5.452423186888111e-06, "loss": 1.3664, "step": 6266 }, { "epoch": 0.897722389342501, "grad_norm": 1.140155553817749, "learning_rate": 5.437321962627606e-06, "loss": 1.2976, "step": 6267 }, { "epoch": 0.8978656352958029, "grad_norm": 1.0092066526412964, "learning_rate": 5.422241095371605e-06, "loss": 1.2941, "step": 6268 }, { "epoch": 0.8980088812491047, "grad_norm": 1.015629768371582, "learning_rate": 5.407180588366689e-06, "loss": 1.4672, "step": 6269 }, { "epoch": 0.8981521272024066, "grad_norm": 1.1754333972930908, "learning_rate": 5.392140444854954e-06, "loss": 1.4973, "step": 6270 }, { "epoch": 0.8982953731557084, "grad_norm": 1.0000135898590088, "learning_rate": 5.377120668074209e-06, "loss": 1.3792, "step": 6271 }, { "epoch": 0.8984386191090101, "grad_norm": 1.2560063600540161, "learning_rate": 5.362121261257847e-06, "loss": 1.5642, "step": 6272 }, { "epoch": 0.898581865062312, "grad_norm": 1.1916520595550537, "learning_rate": 5.3471422276348385e-06, "loss": 1.4755, "step": 6273 }, { "epoch": 0.8987251110156138, "grad_norm": 1.1774929761886597, "learning_rate": 5.332183570429794e-06, "loss": 1.3062, "step": 6274 }, { "epoch": 0.8988683569689156, "grad_norm": 1.1466996669769287, "learning_rate": 5.317245292862994e-06, "loss": 1.5093, "step": 6275 }, { "epoch": 0.8990116029222175, "grad_norm": 1.0106556415557861, "learning_rate": 5.302327398150242e-06, "loss": 1.4178, "step": 6276 }, { "epoch": 0.8991548488755192, "grad_norm": 1.1131858825683594, "learning_rate": 5.287429889502993e-06, "loss": 1.5591, "step": 6277 }, { "epoch": 0.8992980948288211, "grad_norm": 1.1433442831039429, "learning_rate": 5.272552770128314e-06, "loss": 1.3623, "step": 6278 }, { "epoch": 0.8994413407821229, "grad_norm": 1.1273397207260132, "learning_rate": 5.257696043228888e-06, "loss": 1.5303, "step": 6279 }, { "epoch": 0.8995845867354247, "grad_norm": 1.0660606622695923, "learning_rate": 5.242859712003001e-06, "loss": 1.333, "step": 6280 }, { "epoch": 0.8997278326887266, "grad_norm": 1.046756386756897, "learning_rate": 5.22804377964452e-06, "loss": 1.3777, "step": 6281 }, { "epoch": 0.8998710786420283, "grad_norm": 1.1891934871673584, "learning_rate": 5.2132482493429835e-06, "loss": 1.5024, "step": 6282 }, { "epoch": 0.9000143245953302, "grad_norm": 1.0168426036834717, "learning_rate": 5.198473124283509e-06, "loss": 1.5215, "step": 6283 }, { "epoch": 0.900157570548632, "grad_norm": 1.2852948904037476, "learning_rate": 5.183718407646787e-06, "loss": 1.3111, "step": 6284 }, { "epoch": 0.9003008165019338, "grad_norm": 1.2262461185455322, "learning_rate": 5.168984102609142e-06, "loss": 1.3744, "step": 6285 }, { "epoch": 0.9004440624552357, "grad_norm": 0.970107913017273, "learning_rate": 5.1542702123425264e-06, "loss": 1.5221, "step": 6286 }, { "epoch": 0.9005873084085375, "grad_norm": 0.9678454995155334, "learning_rate": 5.139576740014462e-06, "loss": 1.4439, "step": 6287 }, { "epoch": 0.9007305543618392, "grad_norm": 1.179451823234558, "learning_rate": 5.124903688788096e-06, "loss": 1.4788, "step": 6288 }, { "epoch": 0.9008738003151411, "grad_norm": 0.9689302444458008, "learning_rate": 5.1102510618221686e-06, "loss": 1.4789, "step": 6289 }, { "epoch": 0.9010170462684429, "grad_norm": 1.1082656383514404, "learning_rate": 5.0956188622710455e-06, "loss": 1.2672, "step": 6290 }, { "epoch": 0.9011602922217448, "grad_norm": 0.9578465819358826, "learning_rate": 5.081007093284651e-06, "loss": 1.3596, "step": 6291 }, { "epoch": 0.9013035381750466, "grad_norm": 0.9995458722114563, "learning_rate": 5.066415758008536e-06, "loss": 1.4438, "step": 6292 }, { "epoch": 0.9014467841283483, "grad_norm": 1.2014294862747192, "learning_rate": 5.051844859583888e-06, "loss": 1.3551, "step": 6293 }, { "epoch": 0.9015900300816502, "grad_norm": 1.06302011013031, "learning_rate": 5.03729440114743e-06, "loss": 1.217, "step": 6294 }, { "epoch": 0.901733276034952, "grad_norm": 1.2783023118972778, "learning_rate": 5.022764385831524e-06, "loss": 1.4079, "step": 6295 }, { "epoch": 0.9018765219882539, "grad_norm": 1.0324749946594238, "learning_rate": 5.008254816764124e-06, "loss": 1.5073, "step": 6296 }, { "epoch": 0.9020197679415557, "grad_norm": 1.0042519569396973, "learning_rate": 4.993765697068787e-06, "loss": 1.5058, "step": 6297 }, { "epoch": 0.9021630138948574, "grad_norm": 1.0395405292510986, "learning_rate": 4.979297029864672e-06, "loss": 1.4014, "step": 6298 }, { "epoch": 0.9023062598481593, "grad_norm": 0.962011456489563, "learning_rate": 4.964848818266466e-06, "loss": 1.3273, "step": 6299 }, { "epoch": 0.9024495058014611, "grad_norm": 1.288873314857483, "learning_rate": 4.950421065384581e-06, "loss": 1.2289, "step": 6300 }, { "epoch": 0.902592751754763, "grad_norm": 1.252167820930481, "learning_rate": 4.936013774324944e-06, "loss": 1.4425, "step": 6301 }, { "epoch": 0.9027359977080648, "grad_norm": 1.2269313335418701, "learning_rate": 4.9216269481890505e-06, "loss": 1.4974, "step": 6302 }, { "epoch": 0.9028792436613665, "grad_norm": 1.2080711126327515, "learning_rate": 4.907260590074048e-06, "loss": 1.3452, "step": 6303 }, { "epoch": 0.9030224896146684, "grad_norm": 1.1367989778518677, "learning_rate": 4.892914703072671e-06, "loss": 1.3002, "step": 6304 }, { "epoch": 0.9031657355679702, "grad_norm": 1.2795456647872925, "learning_rate": 4.8785892902732275e-06, "loss": 1.2931, "step": 6305 }, { "epoch": 0.903308981521272, "grad_norm": 1.1423330307006836, "learning_rate": 4.864284354759607e-06, "loss": 1.352, "step": 6306 }, { "epoch": 0.9034522274745739, "grad_norm": 1.1512235403060913, "learning_rate": 4.849999899611324e-06, "loss": 1.244, "step": 6307 }, { "epoch": 0.9035954734278757, "grad_norm": 1.193757176399231, "learning_rate": 4.835735927903473e-06, "loss": 1.2428, "step": 6308 }, { "epoch": 0.9037387193811774, "grad_norm": 1.1099021434783936, "learning_rate": 4.821492442706732e-06, "loss": 1.4622, "step": 6309 }, { "epoch": 0.9038819653344793, "grad_norm": 0.9713549613952637, "learning_rate": 4.807269447087348e-06, "loss": 1.3404, "step": 6310 }, { "epoch": 0.9040252112877811, "grad_norm": 1.155503273010254, "learning_rate": 4.793066944107205e-06, "loss": 1.4044, "step": 6311 }, { "epoch": 0.904168457241083, "grad_norm": 1.0077558755874634, "learning_rate": 4.778884936823758e-06, "loss": 1.5516, "step": 6312 }, { "epoch": 0.9043117031943848, "grad_norm": 1.0336101055145264, "learning_rate": 4.764723428290019e-06, "loss": 1.3172, "step": 6313 }, { "epoch": 0.9044549491476865, "grad_norm": 1.1511133909225464, "learning_rate": 4.750582421554605e-06, "loss": 1.4852, "step": 6314 }, { "epoch": 0.9045981951009884, "grad_norm": 1.1120917797088623, "learning_rate": 4.7364619196617495e-06, "loss": 1.3315, "step": 6315 }, { "epoch": 0.9047414410542902, "grad_norm": 1.1380597352981567, "learning_rate": 4.722361925651231e-06, "loss": 1.4127, "step": 6316 }, { "epoch": 0.9048846870075921, "grad_norm": 0.8841583728790283, "learning_rate": 4.708282442558443e-06, "loss": 1.3319, "step": 6317 }, { "epoch": 0.9050279329608939, "grad_norm": 1.1729316711425781, "learning_rate": 4.694223473414328e-06, "loss": 1.2857, "step": 6318 }, { "epoch": 0.9051711789141956, "grad_norm": 0.9037086963653564, "learning_rate": 4.6801850212454755e-06, "loss": 1.3834, "step": 6319 }, { "epoch": 0.9053144248674975, "grad_norm": 1.21295166015625, "learning_rate": 4.6661670890739475e-06, "loss": 1.4177, "step": 6320 }, { "epoch": 0.9054576708207993, "grad_norm": 1.466363549232483, "learning_rate": 4.652169679917517e-06, "loss": 1.4996, "step": 6321 }, { "epoch": 0.9056009167741012, "grad_norm": 1.1179686784744263, "learning_rate": 4.638192796789487e-06, "loss": 1.594, "step": 6322 }, { "epoch": 0.905744162727403, "grad_norm": 1.0876028537750244, "learning_rate": 4.6242364426986815e-06, "loss": 1.4474, "step": 6323 }, { "epoch": 0.9058874086807047, "grad_norm": 1.1088327169418335, "learning_rate": 4.610300620649577e-06, "loss": 1.1431, "step": 6324 }, { "epoch": 0.9060306546340066, "grad_norm": 1.0077004432678223, "learning_rate": 4.59638533364225e-06, "loss": 1.4191, "step": 6325 }, { "epoch": 0.9061739005873084, "grad_norm": 1.439351201057434, "learning_rate": 4.582490584672272e-06, "loss": 1.3493, "step": 6326 }, { "epoch": 0.9063171465406102, "grad_norm": 1.2281850576400757, "learning_rate": 4.5686163767308606e-06, "loss": 1.4246, "step": 6327 }, { "epoch": 0.9064603924939121, "grad_norm": 1.1451550722122192, "learning_rate": 4.554762712804772e-06, "loss": 1.3007, "step": 6328 }, { "epoch": 0.9066036384472138, "grad_norm": 1.092812180519104, "learning_rate": 4.540929595876376e-06, "loss": 1.4011, "step": 6329 }, { "epoch": 0.9067468844005157, "grad_norm": 0.9452556371688843, "learning_rate": 4.527117028923599e-06, "loss": 1.4344, "step": 6330 }, { "epoch": 0.9068901303538175, "grad_norm": 0.9995648264884949, "learning_rate": 4.513325014919923e-06, "loss": 1.5025, "step": 6331 }, { "epoch": 0.9070333763071193, "grad_norm": 1.0484440326690674, "learning_rate": 4.499553556834446e-06, "loss": 1.3382, "step": 6332 }, { "epoch": 0.9071766222604212, "grad_norm": 1.012657642364502, "learning_rate": 4.4858026576318435e-06, "loss": 1.3602, "step": 6333 }, { "epoch": 0.907319868213723, "grad_norm": 1.147506594657898, "learning_rate": 4.472072320272292e-06, "loss": 1.4667, "step": 6334 }, { "epoch": 0.9074631141670247, "grad_norm": 1.0223970413208008, "learning_rate": 4.4583625477116256e-06, "loss": 1.4221, "step": 6335 }, { "epoch": 0.9076063601203266, "grad_norm": 1.0855114459991455, "learning_rate": 4.444673342901218e-06, "loss": 1.2877, "step": 6336 }, { "epoch": 0.9077496060736284, "grad_norm": 1.2820749282836914, "learning_rate": 4.431004708788011e-06, "loss": 1.2337, "step": 6337 }, { "epoch": 0.9078928520269303, "grad_norm": 1.1012824773788452, "learning_rate": 4.41735664831453e-06, "loss": 1.3096, "step": 6338 }, { "epoch": 0.9080360979802321, "grad_norm": 1.0830519199371338, "learning_rate": 4.403729164418857e-06, "loss": 1.4016, "step": 6339 }, { "epoch": 0.9081793439335338, "grad_norm": 1.1054964065551758, "learning_rate": 4.390122260034657e-06, "loss": 1.427, "step": 6340 }, { "epoch": 0.9083225898868357, "grad_norm": 1.2707315683364868, "learning_rate": 4.376535938091153e-06, "loss": 1.1683, "step": 6341 }, { "epoch": 0.9084658358401375, "grad_norm": 1.4266351461410522, "learning_rate": 4.362970201513139e-06, "loss": 1.3057, "step": 6342 }, { "epoch": 0.9086090817934394, "grad_norm": 1.1522914171218872, "learning_rate": 4.3494250532210124e-06, "loss": 1.6687, "step": 6343 }, { "epoch": 0.9087523277467412, "grad_norm": 1.1509004831314087, "learning_rate": 4.335900496130674e-06, "loss": 1.4543, "step": 6344 }, { "epoch": 0.9088955737000429, "grad_norm": 1.037503719329834, "learning_rate": 4.3223965331536386e-06, "loss": 1.3607, "step": 6345 }, { "epoch": 0.9090388196533448, "grad_norm": 1.185473084449768, "learning_rate": 4.308913167196982e-06, "loss": 1.2868, "step": 6346 }, { "epoch": 0.9091820656066466, "grad_norm": 1.037713646888733, "learning_rate": 4.2954504011633255e-06, "loss": 1.4476, "step": 6347 }, { "epoch": 0.9093253115599484, "grad_norm": 1.029164433479309, "learning_rate": 4.282008237950896e-06, "loss": 1.3455, "step": 6348 }, { "epoch": 0.9094685575132503, "grad_norm": 1.2151432037353516, "learning_rate": 4.2685866804534236e-06, "loss": 1.315, "step": 6349 }, { "epoch": 0.909611803466552, "grad_norm": 0.9240227341651917, "learning_rate": 4.255185731560252e-06, "loss": 1.4948, "step": 6350 }, { "epoch": 0.9097550494198539, "grad_norm": 1.0023682117462158, "learning_rate": 4.241805394156295e-06, "loss": 1.5267, "step": 6351 }, { "epoch": 0.9098982953731557, "grad_norm": 1.3643170595169067, "learning_rate": 4.228445671121972e-06, "loss": 1.5083, "step": 6352 }, { "epoch": 0.9100415413264575, "grad_norm": 0.9547325372695923, "learning_rate": 4.215106565333316e-06, "loss": 1.4505, "step": 6353 }, { "epoch": 0.9101847872797594, "grad_norm": 0.9599502086639404, "learning_rate": 4.201788079661928e-06, "loss": 1.4625, "step": 6354 }, { "epoch": 0.9103280332330612, "grad_norm": 1.4839260578155518, "learning_rate": 4.188490216974916e-06, "loss": 1.4722, "step": 6355 }, { "epoch": 0.910471279186363, "grad_norm": 0.9959442019462585, "learning_rate": 4.17521298013499e-06, "loss": 1.4661, "step": 6356 }, { "epoch": 0.9106145251396648, "grad_norm": 1.2105557918548584, "learning_rate": 4.161956372000419e-06, "loss": 1.4673, "step": 6357 }, { "epoch": 0.9107577710929666, "grad_norm": 1.1694109439849854, "learning_rate": 4.14872039542501e-06, "loss": 1.3651, "step": 6358 }, { "epoch": 0.9109010170462685, "grad_norm": 0.937365710735321, "learning_rate": 4.135505053258171e-06, "loss": 1.4675, "step": 6359 }, { "epoch": 0.9110442629995703, "grad_norm": 0.9310875535011292, "learning_rate": 4.122310348344782e-06, "loss": 1.3986, "step": 6360 }, { "epoch": 0.911187508952872, "grad_norm": 0.9632576107978821, "learning_rate": 4.109136283525394e-06, "loss": 1.5452, "step": 6361 }, { "epoch": 0.9113307549061739, "grad_norm": 1.23027765750885, "learning_rate": 4.0959828616360385e-06, "loss": 1.1532, "step": 6362 }, { "epoch": 0.9114740008594757, "grad_norm": 1.1082545518875122, "learning_rate": 4.082850085508305e-06, "loss": 1.326, "step": 6363 }, { "epoch": 0.9116172468127776, "grad_norm": 1.0489147901535034, "learning_rate": 4.069737957969366e-06, "loss": 1.3453, "step": 6364 }, { "epoch": 0.9117604927660794, "grad_norm": 1.1038568019866943, "learning_rate": 4.056646481841952e-06, "loss": 1.3133, "step": 6365 }, { "epoch": 0.9119037387193811, "grad_norm": 0.9686883091926575, "learning_rate": 4.04357565994431e-06, "loss": 1.4335, "step": 6366 }, { "epoch": 0.912046984672683, "grad_norm": 1.1946451663970947, "learning_rate": 4.030525495090276e-06, "loss": 1.3979, "step": 6367 }, { "epoch": 0.9121902306259848, "grad_norm": 1.1127737760543823, "learning_rate": 4.017495990089227e-06, "loss": 1.472, "step": 6368 }, { "epoch": 0.9123334765792867, "grad_norm": 0.9373816847801208, "learning_rate": 4.004487147746105e-06, "loss": 1.3799, "step": 6369 }, { "epoch": 0.9124767225325885, "grad_norm": 1.2301857471466064, "learning_rate": 3.991498970861373e-06, "loss": 1.3138, "step": 6370 }, { "epoch": 0.9126199684858902, "grad_norm": 1.12795889377594, "learning_rate": 3.9785314622310495e-06, "loss": 1.3839, "step": 6371 }, { "epoch": 0.9127632144391921, "grad_norm": 1.0495474338531494, "learning_rate": 3.965584624646768e-06, "loss": 1.2327, "step": 6372 }, { "epoch": 0.9129064603924939, "grad_norm": 1.1497222185134888, "learning_rate": 3.9526584608956196e-06, "loss": 1.449, "step": 6373 }, { "epoch": 0.9130497063457957, "grad_norm": 1.0091516971588135, "learning_rate": 3.939752973760313e-06, "loss": 1.3958, "step": 6374 }, { "epoch": 0.9131929522990976, "grad_norm": 1.1240473985671997, "learning_rate": 3.9268681660190595e-06, "loss": 1.3291, "step": 6375 }, { "epoch": 0.9133361982523994, "grad_norm": 1.1444019079208374, "learning_rate": 3.9140040404456515e-06, "loss": 1.3045, "step": 6376 }, { "epoch": 0.9134794442057012, "grad_norm": 1.0356318950653076, "learning_rate": 3.901160599809428e-06, "loss": 1.3282, "step": 6377 }, { "epoch": 0.913622690159003, "grad_norm": 1.0371900796890259, "learning_rate": 3.888337846875223e-06, "loss": 1.4139, "step": 6378 }, { "epoch": 0.9137659361123048, "grad_norm": 1.1123536825180054, "learning_rate": 3.875535784403495e-06, "loss": 1.2246, "step": 6379 }, { "epoch": 0.9139091820656067, "grad_norm": 1.2111045122146606, "learning_rate": 3.862754415150216e-06, "loss": 1.3707, "step": 6380 }, { "epoch": 0.9140524280189085, "grad_norm": 1.1193556785583496, "learning_rate": 3.849993741866864e-06, "loss": 1.4247, "step": 6381 }, { "epoch": 0.9141956739722102, "grad_norm": 1.1268606185913086, "learning_rate": 3.837253767300519e-06, "loss": 1.3626, "step": 6382 }, { "epoch": 0.9143389199255121, "grad_norm": 1.5920072793960571, "learning_rate": 3.824534494193799e-06, "loss": 1.4573, "step": 6383 }, { "epoch": 0.9144821658788139, "grad_norm": 1.0998446941375732, "learning_rate": 3.8118359252848236e-06, "loss": 1.249, "step": 6384 }, { "epoch": 0.9146254118321158, "grad_norm": 1.1941128969192505, "learning_rate": 3.799158063307273e-06, "loss": 1.3874, "step": 6385 }, { "epoch": 0.9147686577854176, "grad_norm": 0.946759045124054, "learning_rate": 3.786500910990398e-06, "loss": 1.4146, "step": 6386 }, { "epoch": 0.9149119037387193, "grad_norm": 1.0596836805343628, "learning_rate": 3.773864471058963e-06, "loss": 1.1985, "step": 6387 }, { "epoch": 0.9150551496920212, "grad_norm": 1.305836796760559, "learning_rate": 3.76124874623327e-06, "loss": 1.3179, "step": 6388 }, { "epoch": 0.915198395645323, "grad_norm": 1.0566449165344238, "learning_rate": 3.748653739229191e-06, "loss": 1.336, "step": 6389 }, { "epoch": 0.9153416415986249, "grad_norm": 1.0194088220596313, "learning_rate": 3.7360794527581234e-06, "loss": 1.4013, "step": 6390 }, { "epoch": 0.9154848875519267, "grad_norm": 0.9489338994026184, "learning_rate": 3.7235258895269686e-06, "loss": 1.1851, "step": 6391 }, { "epoch": 0.9156281335052284, "grad_norm": 1.189700722694397, "learning_rate": 3.7109930522382097e-06, "loss": 1.4238, "step": 6392 }, { "epoch": 0.9157713794585303, "grad_norm": 1.3931171894073486, "learning_rate": 3.698480943589888e-06, "loss": 1.4314, "step": 6393 }, { "epoch": 0.9159146254118321, "grad_norm": 1.0614889860153198, "learning_rate": 3.685989566275516e-06, "loss": 1.4658, "step": 6394 }, { "epoch": 0.916057871365134, "grad_norm": 0.9896694421768188, "learning_rate": 3.673518922984187e-06, "loss": 1.3688, "step": 6395 }, { "epoch": 0.9162011173184358, "grad_norm": 1.1887366771697998, "learning_rate": 3.66106901640052e-06, "loss": 1.3087, "step": 6396 }, { "epoch": 0.9163443632717376, "grad_norm": 1.233389973640442, "learning_rate": 3.6486398492046827e-06, "loss": 1.3968, "step": 6397 }, { "epoch": 0.9164876092250394, "grad_norm": 1.0355703830718994, "learning_rate": 3.636231424072367e-06, "loss": 1.3336, "step": 6398 }, { "epoch": 0.9166308551783412, "grad_norm": 1.1142059564590454, "learning_rate": 3.623843743674771e-06, "loss": 1.4192, "step": 6399 }, { "epoch": 0.916774101131643, "grad_norm": 1.3490045070648193, "learning_rate": 3.611476810678693e-06, "loss": 1.4034, "step": 6400 }, { "epoch": 0.9169173470849449, "grad_norm": 0.9730175733566284, "learning_rate": 3.5991306277464167e-06, "loss": 1.4531, "step": 6401 }, { "epoch": 0.9170605930382467, "grad_norm": 1.1233919858932495, "learning_rate": 3.5868051975357587e-06, "loss": 1.3041, "step": 6402 }, { "epoch": 0.9172038389915484, "grad_norm": 1.0261316299438477, "learning_rate": 3.5745005227000637e-06, "loss": 1.4572, "step": 6403 }, { "epoch": 0.9173470849448503, "grad_norm": 1.193352460861206, "learning_rate": 3.56221660588828e-06, "loss": 1.3143, "step": 6404 }, { "epoch": 0.9174903308981521, "grad_norm": 1.0419745445251465, "learning_rate": 3.5499534497447807e-06, "loss": 1.2528, "step": 6405 }, { "epoch": 0.917633576851454, "grad_norm": 1.0759903192520142, "learning_rate": 3.5377110569095316e-06, "loss": 1.3717, "step": 6406 }, { "epoch": 0.9177768228047558, "grad_norm": 1.2016997337341309, "learning_rate": 3.5254894300180252e-06, "loss": 1.4165, "step": 6407 }, { "epoch": 0.9179200687580575, "grad_norm": 1.1434471607208252, "learning_rate": 3.5132885717012675e-06, "loss": 1.2155, "step": 6408 }, { "epoch": 0.9180633147113594, "grad_norm": 1.4297231435775757, "learning_rate": 3.5011084845858246e-06, "loss": 1.2848, "step": 6409 }, { "epoch": 0.9182065606646612, "grad_norm": 1.0412758588790894, "learning_rate": 3.4889491712937205e-06, "loss": 1.319, "step": 6410 }, { "epoch": 0.9183498066179631, "grad_norm": 0.9930747151374817, "learning_rate": 3.4768106344425956e-06, "loss": 1.4622, "step": 6411 }, { "epoch": 0.9184930525712649, "grad_norm": 0.957931637763977, "learning_rate": 3.46469287664557e-06, "loss": 1.3093, "step": 6412 }, { "epoch": 0.9186362985245666, "grad_norm": 1.179731845855713, "learning_rate": 3.4525959005112794e-06, "loss": 1.4868, "step": 6413 }, { "epoch": 0.9187795444778685, "grad_norm": 0.889260470867157, "learning_rate": 3.440519708643919e-06, "loss": 1.3579, "step": 6414 }, { "epoch": 0.9189227904311703, "grad_norm": 1.1458030939102173, "learning_rate": 3.428464303643186e-06, "loss": 1.3403, "step": 6415 }, { "epoch": 0.9190660363844722, "grad_norm": 1.189430594444275, "learning_rate": 3.4164296881043055e-06, "loss": 1.5304, "step": 6416 }, { "epoch": 0.919209282337774, "grad_norm": 1.0229490995407104, "learning_rate": 3.4044158646180604e-06, "loss": 1.3845, "step": 6417 }, { "epoch": 0.9193525282910758, "grad_norm": 1.1848504543304443, "learning_rate": 3.3924228357706922e-06, "loss": 1.4941, "step": 6418 }, { "epoch": 0.9194957742443776, "grad_norm": 1.21671724319458, "learning_rate": 3.3804506041440363e-06, "loss": 1.5864, "step": 6419 }, { "epoch": 0.9196390201976794, "grad_norm": 1.372091293334961, "learning_rate": 3.368499172315398e-06, "loss": 1.36, "step": 6420 }, { "epoch": 0.9197822661509812, "grad_norm": 1.164242148399353, "learning_rate": 3.356568542857619e-06, "loss": 1.2125, "step": 6421 }, { "epoch": 0.9199255121042831, "grad_norm": 1.1633225679397583, "learning_rate": 3.3446587183390888e-06, "loss": 1.4637, "step": 6422 }, { "epoch": 0.9200687580575849, "grad_norm": 0.9969565868377686, "learning_rate": 3.3327697013236904e-06, "loss": 1.4894, "step": 6423 }, { "epoch": 0.9202120040108867, "grad_norm": 1.3349730968475342, "learning_rate": 3.3209014943708204e-06, "loss": 1.2023, "step": 6424 }, { "epoch": 0.9203552499641885, "grad_norm": 1.0232676267623901, "learning_rate": 3.3090541000354358e-06, "loss": 1.4571, "step": 6425 }, { "epoch": 0.9204984959174903, "grad_norm": 1.0784478187561035, "learning_rate": 3.2972275208679625e-06, "loss": 1.4151, "step": 6426 }, { "epoch": 0.9206417418707922, "grad_norm": 0.9066776633262634, "learning_rate": 3.2854217594143975e-06, "loss": 1.3459, "step": 6427 }, { "epoch": 0.920784987824094, "grad_norm": 1.1936663389205933, "learning_rate": 3.273636818216197e-06, "loss": 1.468, "step": 6428 }, { "epoch": 0.9209282337773957, "grad_norm": 1.1588611602783203, "learning_rate": 3.2618726998103867e-06, "loss": 1.6047, "step": 6429 }, { "epoch": 0.9210714797306976, "grad_norm": 1.0405627489089966, "learning_rate": 3.2501294067295073e-06, "loss": 1.6571, "step": 6430 }, { "epoch": 0.9212147256839994, "grad_norm": 1.163070797920227, "learning_rate": 3.2384069415015594e-06, "loss": 1.2721, "step": 6431 }, { "epoch": 0.9213579716373013, "grad_norm": 1.0822196006774902, "learning_rate": 3.226705306650113e-06, "loss": 1.3982, "step": 6432 }, { "epoch": 0.9215012175906031, "grad_norm": 1.1785391569137573, "learning_rate": 3.215024504694264e-06, "loss": 1.4575, "step": 6433 }, { "epoch": 0.9216444635439048, "grad_norm": 1.0128576755523682, "learning_rate": 3.203364538148579e-06, "loss": 1.3587, "step": 6434 }, { "epoch": 0.9217877094972067, "grad_norm": 0.9469732642173767, "learning_rate": 3.1917254095231606e-06, "loss": 1.3952, "step": 6435 }, { "epoch": 0.9219309554505085, "grad_norm": 1.11313796043396, "learning_rate": 3.1801071213236277e-06, "loss": 1.4033, "step": 6436 }, { "epoch": 0.9220742014038104, "grad_norm": 0.9706379771232605, "learning_rate": 3.1685096760511123e-06, "loss": 1.5194, "step": 6437 }, { "epoch": 0.9222174473571122, "grad_norm": 1.0394903421401978, "learning_rate": 3.156933076202262e-06, "loss": 1.2867, "step": 6438 }, { "epoch": 0.9223606933104139, "grad_norm": 1.1205493211746216, "learning_rate": 3.145377324269205e-06, "loss": 1.5495, "step": 6439 }, { "epoch": 0.9225039392637158, "grad_norm": 1.1452409029006958, "learning_rate": 3.1338424227396524e-06, "loss": 1.4917, "step": 6440 }, { "epoch": 0.9226471852170176, "grad_norm": 0.8819735646247864, "learning_rate": 3.122328374096761e-06, "loss": 1.3784, "step": 6441 }, { "epoch": 0.9227904311703194, "grad_norm": 1.0010957717895508, "learning_rate": 3.110835180819216e-06, "loss": 1.3729, "step": 6442 }, { "epoch": 0.9229336771236213, "grad_norm": 1.091059684753418, "learning_rate": 3.099362845381215e-06, "loss": 1.3659, "step": 6443 }, { "epoch": 0.9230769230769231, "grad_norm": 1.1242009401321411, "learning_rate": 3.0879113702524832e-06, "loss": 1.3277, "step": 6444 }, { "epoch": 0.9232201690302249, "grad_norm": 1.8432196378707886, "learning_rate": 3.0764807578982256e-06, "loss": 1.207, "step": 6445 }, { "epoch": 0.9233634149835267, "grad_norm": 1.1757209300994873, "learning_rate": 3.0650710107791748e-06, "loss": 1.4039, "step": 6446 }, { "epoch": 0.9235066609368285, "grad_norm": 0.9085772037506104, "learning_rate": 3.053682131351576e-06, "loss": 1.4077, "step": 6447 }, { "epoch": 0.9236499068901304, "grad_norm": 1.0273581743240356, "learning_rate": 3.042314122067169e-06, "loss": 1.3291, "step": 6448 }, { "epoch": 0.9237931528434322, "grad_norm": 1.4282138347625732, "learning_rate": 3.0309669853731848e-06, "loss": 1.363, "step": 6449 }, { "epoch": 0.923936398796734, "grad_norm": 1.030496597290039, "learning_rate": 3.0196407237124024e-06, "loss": 1.358, "step": 6450 }, { "epoch": 0.9240796447500358, "grad_norm": 1.0387153625488281, "learning_rate": 3.008335339523105e-06, "loss": 1.41, "step": 6451 }, { "epoch": 0.9242228907033376, "grad_norm": 1.1078928709030151, "learning_rate": 2.9970508352390125e-06, "loss": 1.3474, "step": 6452 }, { "epoch": 0.9243661366566395, "grad_norm": 1.038006067276001, "learning_rate": 2.985787213289415e-06, "loss": 1.3773, "step": 6453 }, { "epoch": 0.9245093826099413, "grad_norm": 0.962173581123352, "learning_rate": 2.9745444760991283e-06, "loss": 1.3373, "step": 6454 }, { "epoch": 0.924652628563243, "grad_norm": 0.9636121988296509, "learning_rate": 2.963322626088405e-06, "loss": 1.3639, "step": 6455 }, { "epoch": 0.9247958745165449, "grad_norm": 1.2030967473983765, "learning_rate": 2.9521216656730242e-06, "loss": 1.4641, "step": 6456 }, { "epoch": 0.9249391204698467, "grad_norm": 1.2235599756240845, "learning_rate": 2.9409415972642905e-06, "loss": 1.4006, "step": 6457 }, { "epoch": 0.9250823664231486, "grad_norm": 1.1225051879882812, "learning_rate": 2.9297824232689895e-06, "loss": 1.4989, "step": 6458 }, { "epoch": 0.9252256123764504, "grad_norm": 1.066063404083252, "learning_rate": 2.9186441460894333e-06, "loss": 1.3654, "step": 6459 }, { "epoch": 0.9253688583297521, "grad_norm": 1.2072542905807495, "learning_rate": 2.9075267681233697e-06, "loss": 1.2892, "step": 6460 }, { "epoch": 0.925512104283054, "grad_norm": 1.2591460943222046, "learning_rate": 2.896430291764152e-06, "loss": 1.3125, "step": 6461 }, { "epoch": 0.9256553502363558, "grad_norm": 1.2391941547393799, "learning_rate": 2.885354719400557e-06, "loss": 1.3301, "step": 6462 }, { "epoch": 0.9257985961896577, "grad_norm": 1.0523512363433838, "learning_rate": 2.8743000534168675e-06, "loss": 1.5114, "step": 6463 }, { "epoch": 0.9259418421429595, "grad_norm": 0.9606161713600159, "learning_rate": 2.863266296192879e-06, "loss": 1.5302, "step": 6464 }, { "epoch": 0.9260850880962613, "grad_norm": 0.963270366191864, "learning_rate": 2.8522534501039035e-06, "loss": 1.3593, "step": 6465 }, { "epoch": 0.9262283340495631, "grad_norm": 1.2613978385925293, "learning_rate": 2.8412615175207324e-06, "loss": 1.4532, "step": 6466 }, { "epoch": 0.9263715800028649, "grad_norm": 0.9834369421005249, "learning_rate": 2.8302905008096403e-06, "loss": 1.3304, "step": 6467 }, { "epoch": 0.9265148259561667, "grad_norm": 1.0687365531921387, "learning_rate": 2.8193404023324376e-06, "loss": 1.4737, "step": 6468 }, { "epoch": 0.9266580719094686, "grad_norm": 1.2179547548294067, "learning_rate": 2.8084112244464056e-06, "loss": 1.425, "step": 6469 }, { "epoch": 0.9268013178627704, "grad_norm": 1.090914011001587, "learning_rate": 2.7975029695043064e-06, "loss": 1.3174, "step": 6470 }, { "epoch": 0.9269445638160722, "grad_norm": 1.0008254051208496, "learning_rate": 2.7866156398544176e-06, "loss": 1.3894, "step": 6471 }, { "epoch": 0.927087809769374, "grad_norm": 1.2196687459945679, "learning_rate": 2.7757492378405414e-06, "loss": 1.303, "step": 6472 }, { "epoch": 0.9272310557226758, "grad_norm": 1.0364189147949219, "learning_rate": 2.7649037658019183e-06, "loss": 1.3859, "step": 6473 }, { "epoch": 0.9273743016759777, "grad_norm": 1.0551917552947998, "learning_rate": 2.754079226073325e-06, "loss": 1.5722, "step": 6474 }, { "epoch": 0.9275175476292795, "grad_norm": 0.9522950053215027, "learning_rate": 2.7432756209850084e-06, "loss": 1.4138, "step": 6475 }, { "epoch": 0.9276607935825812, "grad_norm": 1.098632574081421, "learning_rate": 2.7324929528627195e-06, "loss": 1.4097, "step": 6476 }, { "epoch": 0.9278040395358831, "grad_norm": 0.9926488995552063, "learning_rate": 2.7217312240277127e-06, "loss": 1.2624, "step": 6477 }, { "epoch": 0.9279472854891849, "grad_norm": 1.0938059091567993, "learning_rate": 2.71099043679669e-06, "loss": 1.3838, "step": 6478 }, { "epoch": 0.9280905314424868, "grad_norm": 1.2125778198242188, "learning_rate": 2.7002705934819018e-06, "loss": 1.2383, "step": 6479 }, { "epoch": 0.9282337773957886, "grad_norm": 1.1910125017166138, "learning_rate": 2.68957169639108e-06, "loss": 1.3472, "step": 6480 }, { "epoch": 0.9283770233490903, "grad_norm": 1.1332073211669922, "learning_rate": 2.6788937478273934e-06, "loss": 1.4529, "step": 6481 }, { "epoch": 0.9285202693023922, "grad_norm": 1.0849745273590088, "learning_rate": 2.6682367500895587e-06, "loss": 1.3518, "step": 6482 }, { "epoch": 0.928663515255694, "grad_norm": 1.0638624429702759, "learning_rate": 2.6576007054717746e-06, "loss": 1.4478, "step": 6483 }, { "epoch": 0.9288067612089959, "grad_norm": 1.1152218580245972, "learning_rate": 2.6469856162637086e-06, "loss": 1.5815, "step": 6484 }, { "epoch": 0.9289500071622977, "grad_norm": 1.0590484142303467, "learning_rate": 2.6363914847505113e-06, "loss": 1.3776, "step": 6485 }, { "epoch": 0.9290932531155995, "grad_norm": 1.0281152725219727, "learning_rate": 2.625818313212869e-06, "loss": 1.4886, "step": 6486 }, { "epoch": 0.9292364990689013, "grad_norm": 1.170256495475769, "learning_rate": 2.6152661039268943e-06, "loss": 1.4909, "step": 6487 }, { "epoch": 0.9293797450222031, "grad_norm": 1.1871517896652222, "learning_rate": 2.6047348591642483e-06, "loss": 1.381, "step": 6488 }, { "epoch": 0.929522990975505, "grad_norm": 1.199926733970642, "learning_rate": 2.5942245811920065e-06, "loss": 1.5402, "step": 6489 }, { "epoch": 0.9296662369288068, "grad_norm": 1.009153127670288, "learning_rate": 2.5837352722728026e-06, "loss": 1.5732, "step": 6490 }, { "epoch": 0.9298094828821086, "grad_norm": 0.829253077507019, "learning_rate": 2.57326693466472e-06, "loss": 1.4724, "step": 6491 }, { "epoch": 0.9299527288354104, "grad_norm": 1.084157109260559, "learning_rate": 2.562819570621322e-06, "loss": 1.4095, "step": 6492 }, { "epoch": 0.9300959747887122, "grad_norm": 1.1585246324539185, "learning_rate": 2.552393182391677e-06, "loss": 1.3877, "step": 6493 }, { "epoch": 0.930239220742014, "grad_norm": 0.9959848523139954, "learning_rate": 2.5419877722203333e-06, "loss": 1.3872, "step": 6494 }, { "epoch": 0.9303824666953159, "grad_norm": 1.3616734743118286, "learning_rate": 2.5316033423472997e-06, "loss": 1.3783, "step": 6495 }, { "epoch": 0.9305257126486177, "grad_norm": 1.2964977025985718, "learning_rate": 2.5212398950081096e-06, "loss": 1.3801, "step": 6496 }, { "epoch": 0.9306689586019194, "grad_norm": 1.019456386566162, "learning_rate": 2.510897432433734e-06, "loss": 1.3676, "step": 6497 }, { "epoch": 0.9308122045552213, "grad_norm": 0.9017200469970703, "learning_rate": 2.5005759568506704e-06, "loss": 1.3527, "step": 6498 }, { "epoch": 0.9309554505085231, "grad_norm": 0.9133973717689514, "learning_rate": 2.490275470480863e-06, "loss": 1.4446, "step": 6499 }, { "epoch": 0.931098696461825, "grad_norm": 0.961898922920227, "learning_rate": 2.479995975541749e-06, "loss": 1.4438, "step": 6500 }, { "epoch": 0.9312419424151268, "grad_norm": 1.2371913194656372, "learning_rate": 2.4697374742462698e-06, "loss": 1.2757, "step": 6501 }, { "epoch": 0.9313851883684285, "grad_norm": 1.0603811740875244, "learning_rate": 2.4594999688028032e-06, "loss": 1.3934, "step": 6502 }, { "epoch": 0.9315284343217304, "grad_norm": 0.9528247714042664, "learning_rate": 2.4492834614152414e-06, "loss": 1.2006, "step": 6503 }, { "epoch": 0.9316716802750322, "grad_norm": 1.3816990852355957, "learning_rate": 2.439087954282948e-06, "loss": 1.4039, "step": 6504 }, { "epoch": 0.9318149262283341, "grad_norm": 1.1050955057144165, "learning_rate": 2.428913449600756e-06, "loss": 1.1844, "step": 6505 }, { "epoch": 0.9319581721816359, "grad_norm": 1.1582280397415161, "learning_rate": 2.418759949558991e-06, "loss": 1.4193, "step": 6506 }, { "epoch": 0.9321014181349377, "grad_norm": 1.1322112083435059, "learning_rate": 2.4086274563434488e-06, "loss": 1.4639, "step": 6507 }, { "epoch": 0.9322446640882395, "grad_norm": 1.1956231594085693, "learning_rate": 2.3985159721353956e-06, "loss": 1.3419, "step": 6508 }, { "epoch": 0.9323879100415413, "grad_norm": 0.9906167387962341, "learning_rate": 2.388425499111613e-06, "loss": 1.5061, "step": 6509 }, { "epoch": 0.9325311559948432, "grad_norm": 1.0036982297897339, "learning_rate": 2.378356039444285e-06, "loss": 1.3708, "step": 6510 }, { "epoch": 0.932674401948145, "grad_norm": 0.9920417666435242, "learning_rate": 2.3683075953011558e-06, "loss": 1.6787, "step": 6511 }, { "epoch": 0.9328176479014468, "grad_norm": 1.249273657798767, "learning_rate": 2.3582801688453948e-06, "loss": 1.4878, "step": 6512 }, { "epoch": 0.9329608938547486, "grad_norm": 0.9275979399681091, "learning_rate": 2.348273762235642e-06, "loss": 1.3736, "step": 6513 }, { "epoch": 0.9331041398080504, "grad_norm": 1.1471542119979858, "learning_rate": 2.3382883776260524e-06, "loss": 1.4489, "step": 6514 }, { "epoch": 0.9332473857613522, "grad_norm": 1.1553690433502197, "learning_rate": 2.3283240171662167e-06, "loss": 1.3614, "step": 6515 }, { "epoch": 0.9333906317146541, "grad_norm": 1.105751633644104, "learning_rate": 2.318380683001231e-06, "loss": 1.3318, "step": 6516 }, { "epoch": 0.9335338776679559, "grad_norm": 1.143595814704895, "learning_rate": 2.3084583772716275e-06, "loss": 1.5478, "step": 6517 }, { "epoch": 0.9336771236212577, "grad_norm": 1.1402645111083984, "learning_rate": 2.2985571021134524e-06, "loss": 1.4338, "step": 6518 }, { "epoch": 0.9338203695745595, "grad_norm": 1.2224313020706177, "learning_rate": 2.288676859658212e-06, "loss": 1.3265, "step": 6519 }, { "epoch": 0.9339636155278613, "grad_norm": 1.1123749017715454, "learning_rate": 2.27881765203285e-06, "loss": 1.4388, "step": 6520 }, { "epoch": 0.9341068614811632, "grad_norm": 1.0451598167419434, "learning_rate": 2.268979481359812e-06, "loss": 1.4712, "step": 6521 }, { "epoch": 0.934250107434465, "grad_norm": 1.1520843505859375, "learning_rate": 2.259162349757038e-06, "loss": 1.3638, "step": 6522 }, { "epoch": 0.9343933533877667, "grad_norm": 0.9486491680145264, "learning_rate": 2.249366259337893e-06, "loss": 1.437, "step": 6523 }, { "epoch": 0.9345365993410686, "grad_norm": 1.0022501945495605, "learning_rate": 2.239591212211245e-06, "loss": 1.3158, "step": 6524 }, { "epoch": 0.9346798452943704, "grad_norm": 1.1213300228118896, "learning_rate": 2.2298372104814115e-06, "loss": 1.294, "step": 6525 }, { "epoch": 0.9348230912476723, "grad_norm": 1.2453380823135376, "learning_rate": 2.22010425624819e-06, "loss": 1.4299, "step": 6526 }, { "epoch": 0.9349663372009741, "grad_norm": 0.9873899221420288, "learning_rate": 2.2103923516068605e-06, "loss": 1.3196, "step": 6527 }, { "epoch": 0.9351095831542758, "grad_norm": 0.9710806012153625, "learning_rate": 2.2007014986481167e-06, "loss": 1.3067, "step": 6528 }, { "epoch": 0.9352528291075777, "grad_norm": 1.000098466873169, "learning_rate": 2.1910316994581904e-06, "loss": 1.4584, "step": 6529 }, { "epoch": 0.9353960750608795, "grad_norm": 1.0472301244735718, "learning_rate": 2.1813829561187604e-06, "loss": 1.2174, "step": 6530 }, { "epoch": 0.9355393210141814, "grad_norm": 1.1677483320236206, "learning_rate": 2.1717552707069323e-06, "loss": 1.2664, "step": 6531 }, { "epoch": 0.9356825669674832, "grad_norm": 0.9195520877838135, "learning_rate": 2.1621486452953144e-06, "loss": 1.5163, "step": 6532 }, { "epoch": 0.935825812920785, "grad_norm": 1.0236012935638428, "learning_rate": 2.152563081951997e-06, "loss": 1.2329, "step": 6533 }, { "epoch": 0.9359690588740868, "grad_norm": 1.543801188468933, "learning_rate": 2.1429985827404853e-06, "loss": 1.295, "step": 6534 }, { "epoch": 0.9361123048273886, "grad_norm": 1.1117914915084839, "learning_rate": 2.1334551497197987e-06, "loss": 1.4779, "step": 6535 }, { "epoch": 0.9362555507806904, "grad_norm": 1.0583205223083496, "learning_rate": 2.1239327849444045e-06, "loss": 1.2041, "step": 6536 }, { "epoch": 0.9363987967339923, "grad_norm": 1.1168370246887207, "learning_rate": 2.1144314904642195e-06, "loss": 1.3311, "step": 6537 }, { "epoch": 0.9365420426872941, "grad_norm": 1.0833189487457275, "learning_rate": 2.104951268324651e-06, "loss": 1.4011, "step": 6538 }, { "epoch": 0.9366852886405959, "grad_norm": 1.1070337295532227, "learning_rate": 2.095492120566522e-06, "loss": 1.2256, "step": 6539 }, { "epoch": 0.9368285345938977, "grad_norm": 1.0879265069961548, "learning_rate": 2.0860540492262047e-06, "loss": 1.2924, "step": 6540 }, { "epoch": 0.9369717805471995, "grad_norm": 1.113938570022583, "learning_rate": 2.0766370563354508e-06, "loss": 1.4909, "step": 6541 }, { "epoch": 0.9371150265005014, "grad_norm": 0.9503293037414551, "learning_rate": 2.0672411439215165e-06, "loss": 1.3318, "step": 6542 }, { "epoch": 0.9372582724538032, "grad_norm": 1.1616195440292358, "learning_rate": 2.0578663140070954e-06, "loss": 1.3118, "step": 6543 }, { "epoch": 0.937401518407105, "grad_norm": 1.1152852773666382, "learning_rate": 2.048512568610361e-06, "loss": 1.2262, "step": 6544 }, { "epoch": 0.9375447643604068, "grad_norm": 1.106892704963684, "learning_rate": 2.0391799097449593e-06, "loss": 1.1471, "step": 6545 }, { "epoch": 0.9376880103137086, "grad_norm": 1.2196919918060303, "learning_rate": 2.02986833941996e-06, "loss": 1.4286, "step": 6546 }, { "epoch": 0.9378312562670105, "grad_norm": 1.0149511098861694, "learning_rate": 2.020577859639927e-06, "loss": 1.2843, "step": 6547 }, { "epoch": 0.9379745022203123, "grad_norm": 1.0943989753723145, "learning_rate": 2.0113084724048715e-06, "loss": 1.2628, "step": 6548 }, { "epoch": 0.938117748173614, "grad_norm": 0.9725356101989746, "learning_rate": 2.0020601797102523e-06, "loss": 1.4806, "step": 6549 }, { "epoch": 0.9382609941269159, "grad_norm": 1.04365074634552, "learning_rate": 1.9928329835469996e-06, "loss": 1.4893, "step": 6550 }, { "epoch": 0.9384042400802177, "grad_norm": 0.9421520233154297, "learning_rate": 1.983626885901513e-06, "loss": 1.4679, "step": 6551 }, { "epoch": 0.9385474860335196, "grad_norm": 1.0554935932159424, "learning_rate": 1.974441888755629e-06, "loss": 1.3595, "step": 6552 }, { "epoch": 0.9386907319868214, "grad_norm": 1.0590242147445679, "learning_rate": 1.965277994086645e-06, "loss": 1.3376, "step": 6553 }, { "epoch": 0.9388339779401232, "grad_norm": 0.9728385210037231, "learning_rate": 1.9561352038673263e-06, "loss": 1.3972, "step": 6554 }, { "epoch": 0.938977223893425, "grad_norm": 1.1325448751449585, "learning_rate": 1.947013520065899e-06, "loss": 1.4875, "step": 6555 }, { "epoch": 0.9391204698467268, "grad_norm": 1.1805099248886108, "learning_rate": 1.9379129446460253e-06, "loss": 1.2584, "step": 6556 }, { "epoch": 0.9392637158000287, "grad_norm": 1.02956223487854, "learning_rate": 1.9288334795668163e-06, "loss": 1.4759, "step": 6557 }, { "epoch": 0.9394069617533305, "grad_norm": 1.0579255819320679, "learning_rate": 1.9197751267828855e-06, "loss": 1.4481, "step": 6558 }, { "epoch": 0.9395502077066323, "grad_norm": 1.195554494857788, "learning_rate": 1.910737888244274e-06, "loss": 1.448, "step": 6559 }, { "epoch": 0.9396934536599341, "grad_norm": 1.2533451318740845, "learning_rate": 1.901721765896447e-06, "loss": 1.3288, "step": 6560 }, { "epoch": 0.9398366996132359, "grad_norm": 0.9402534365653992, "learning_rate": 1.8927267616803634e-06, "loss": 1.3684, "step": 6561 }, { "epoch": 0.9399799455665377, "grad_norm": 1.0570292472839355, "learning_rate": 1.883752877532452e-06, "loss": 1.444, "step": 6562 }, { "epoch": 0.9401231915198396, "grad_norm": 1.218531847000122, "learning_rate": 1.874800115384523e-06, "loss": 1.2587, "step": 6563 }, { "epoch": 0.9402664374731414, "grad_norm": 0.9667032957077026, "learning_rate": 1.8658684771639234e-06, "loss": 1.5308, "step": 6564 }, { "epoch": 0.9404096834264432, "grad_norm": 1.2847278118133545, "learning_rate": 1.8569579647933933e-06, "loss": 1.5103, "step": 6565 }, { "epoch": 0.940552929379745, "grad_norm": 1.2722854614257812, "learning_rate": 1.8480685801911424e-06, "loss": 1.4827, "step": 6566 }, { "epoch": 0.9406961753330468, "grad_norm": 1.0711419582366943, "learning_rate": 1.8392003252708622e-06, "loss": 1.3001, "step": 6567 }, { "epoch": 0.9408394212863487, "grad_norm": 1.0763605833053589, "learning_rate": 1.8303532019416258e-06, "loss": 1.3421, "step": 6568 }, { "epoch": 0.9409826672396505, "grad_norm": 0.9694857001304626, "learning_rate": 1.8215272121080317e-06, "loss": 1.35, "step": 6569 }, { "epoch": 0.9411259131929522, "grad_norm": 1.085087776184082, "learning_rate": 1.8127223576701046e-06, "loss": 1.28, "step": 6570 }, { "epoch": 0.9412691591462541, "grad_norm": 1.1587072610855103, "learning_rate": 1.8039386405232728e-06, "loss": 1.2014, "step": 6571 }, { "epoch": 0.9414124050995559, "grad_norm": 1.0818008184432983, "learning_rate": 1.79517606255849e-06, "loss": 1.3786, "step": 6572 }, { "epoch": 0.9415556510528578, "grad_norm": 0.9864383935928345, "learning_rate": 1.7864346256621033e-06, "loss": 1.4213, "step": 6573 }, { "epoch": 0.9416988970061596, "grad_norm": 1.424342155456543, "learning_rate": 1.7777143317159406e-06, "loss": 1.3157, "step": 6574 }, { "epoch": 0.9418421429594614, "grad_norm": 1.0142978429794312, "learning_rate": 1.7690151825972446e-06, "loss": 1.3288, "step": 6575 }, { "epoch": 0.9419853889127632, "grad_norm": 1.090136170387268, "learning_rate": 1.7603371801787505e-06, "loss": 1.3036, "step": 6576 }, { "epoch": 0.942128634866065, "grad_norm": 1.0465911626815796, "learning_rate": 1.7516803263286086e-06, "loss": 1.4323, "step": 6577 }, { "epoch": 0.9422718808193669, "grad_norm": 0.8737765550613403, "learning_rate": 1.743044622910417e-06, "loss": 1.4544, "step": 6578 }, { "epoch": 0.9424151267726687, "grad_norm": 1.2587758302688599, "learning_rate": 1.734430071783244e-06, "loss": 1.4904, "step": 6579 }, { "epoch": 0.9425583727259705, "grad_norm": 1.3229172229766846, "learning_rate": 1.7258366748015842e-06, "loss": 1.3992, "step": 6580 }, { "epoch": 0.9427016186792723, "grad_norm": 1.1931192874908447, "learning_rate": 1.7172644338153686e-06, "loss": 1.4733, "step": 6581 }, { "epoch": 0.9428448646325741, "grad_norm": 1.0861363410949707, "learning_rate": 1.7087133506699992e-06, "loss": 1.329, "step": 6582 }, { "epoch": 0.942988110585876, "grad_norm": 1.0513147115707397, "learning_rate": 1.7001834272063255e-06, "loss": 1.4861, "step": 6583 }, { "epoch": 0.9431313565391778, "grad_norm": 1.1442103385925293, "learning_rate": 1.6916746652606119e-06, "loss": 1.5508, "step": 6584 }, { "epoch": 0.9432746024924796, "grad_norm": 1.0573018789291382, "learning_rate": 1.6831870666645822e-06, "loss": 1.2921, "step": 6585 }, { "epoch": 0.9434178484457814, "grad_norm": 1.0331250429153442, "learning_rate": 1.6747206332454191e-06, "loss": 1.3437, "step": 6586 }, { "epoch": 0.9435610943990832, "grad_norm": 1.190805196762085, "learning_rate": 1.6662753668257314e-06, "loss": 1.2865, "step": 6587 }, { "epoch": 0.943704340352385, "grad_norm": 1.1967812776565552, "learning_rate": 1.657851269223587e-06, "loss": 1.2588, "step": 6588 }, { "epoch": 0.9438475863056869, "grad_norm": 1.2354650497436523, "learning_rate": 1.6494483422524466e-06, "loss": 1.2766, "step": 6589 }, { "epoch": 0.9439908322589887, "grad_norm": 0.9557923674583435, "learning_rate": 1.641066587721296e-06, "loss": 1.193, "step": 6590 }, { "epoch": 0.9441340782122905, "grad_norm": 1.3335047960281372, "learning_rate": 1.6327060074345147e-06, "loss": 1.4057, "step": 6591 }, { "epoch": 0.9442773241655923, "grad_norm": 1.118815541267395, "learning_rate": 1.624366603191907e-06, "loss": 1.1424, "step": 6592 }, { "epoch": 0.9444205701188941, "grad_norm": 0.9733049273490906, "learning_rate": 1.616048376788748e-06, "loss": 1.402, "step": 6593 }, { "epoch": 0.944563816072196, "grad_norm": 1.3634076118469238, "learning_rate": 1.6077513300157499e-06, "loss": 1.3282, "step": 6594 }, { "epoch": 0.9447070620254978, "grad_norm": 1.0222326517105103, "learning_rate": 1.5994754646590615e-06, "loss": 1.2558, "step": 6595 }, { "epoch": 0.9448503079787997, "grad_norm": 0.9995129704475403, "learning_rate": 1.591220782500269e-06, "loss": 1.4475, "step": 6596 }, { "epoch": 0.9449935539321014, "grad_norm": 1.0908689498901367, "learning_rate": 1.5829872853163952e-06, "loss": 1.3801, "step": 6597 }, { "epoch": 0.9451367998854032, "grad_norm": 1.081181287765503, "learning_rate": 1.5747749748799334e-06, "loss": 1.2232, "step": 6598 }, { "epoch": 0.9452800458387051, "grad_norm": 0.9398199319839478, "learning_rate": 1.5665838529587695e-06, "loss": 1.3441, "step": 6599 }, { "epoch": 0.9454232917920069, "grad_norm": 1.1726338863372803, "learning_rate": 1.5584139213162374e-06, "loss": 1.29, "step": 6600 }, { "epoch": 0.9455665377453087, "grad_norm": 0.967519223690033, "learning_rate": 1.5502651817111524e-06, "loss": 1.5401, "step": 6601 }, { "epoch": 0.9457097836986105, "grad_norm": 1.0717920064926147, "learning_rate": 1.5421376358977224e-06, "loss": 1.4192, "step": 6602 }, { "epoch": 0.9458530296519123, "grad_norm": 1.906010389328003, "learning_rate": 1.5340312856255922e-06, "loss": 1.2618, "step": 6603 }, { "epoch": 0.9459962756052142, "grad_norm": 1.0776267051696777, "learning_rate": 1.5259461326398772e-06, "loss": 1.5682, "step": 6604 }, { "epoch": 0.946139521558516, "grad_norm": 1.0738922357559204, "learning_rate": 1.517882178681107e-06, "loss": 1.3782, "step": 6605 }, { "epoch": 0.9462827675118178, "grad_norm": 0.9170629978179932, "learning_rate": 1.50983942548526e-06, "loss": 1.3167, "step": 6606 }, { "epoch": 0.9464260134651196, "grad_norm": 1.1050288677215576, "learning_rate": 1.5018178747837174e-06, "loss": 1.2376, "step": 6607 }, { "epoch": 0.9465692594184214, "grad_norm": 1.2266547679901123, "learning_rate": 1.4938175283033319e-06, "loss": 1.3362, "step": 6608 }, { "epoch": 0.9467125053717232, "grad_norm": 1.1739236116409302, "learning_rate": 1.4858383877664029e-06, "loss": 1.367, "step": 6609 }, { "epoch": 0.9468557513250251, "grad_norm": 0.9569011330604553, "learning_rate": 1.477880454890601e-06, "loss": 1.3771, "step": 6610 }, { "epoch": 0.9469989972783269, "grad_norm": 1.0979208946228027, "learning_rate": 1.4699437313891007e-06, "loss": 1.439, "step": 6611 }, { "epoch": 0.9471422432316287, "grad_norm": 1.0229387283325195, "learning_rate": 1.4620282189704793e-06, "loss": 1.5583, "step": 6612 }, { "epoch": 0.9472854891849305, "grad_norm": 0.9753866195678711, "learning_rate": 1.4541339193387404e-06, "loss": 1.2007, "step": 6613 }, { "epoch": 0.9474287351382323, "grad_norm": 1.0789514780044556, "learning_rate": 1.446260834193336e-06, "loss": 1.5225, "step": 6614 }, { "epoch": 0.9475719810915342, "grad_norm": 1.0734541416168213, "learning_rate": 1.4384089652291543e-06, "loss": 1.4904, "step": 6615 }, { "epoch": 0.947715227044836, "grad_norm": 1.1250686645507812, "learning_rate": 1.4305783141364992e-06, "loss": 1.4637, "step": 6616 }, { "epoch": 0.9478584729981379, "grad_norm": 1.107622504234314, "learning_rate": 1.422768882601122e-06, "loss": 1.4989, "step": 6617 }, { "epoch": 0.9480017189514396, "grad_norm": 0.9898762702941895, "learning_rate": 1.4149806723041892e-06, "loss": 1.3872, "step": 6618 }, { "epoch": 0.9481449649047414, "grad_norm": 1.1569151878356934, "learning_rate": 1.407213684922315e-06, "loss": 1.3477, "step": 6619 }, { "epoch": 0.9482882108580433, "grad_norm": 1.061189889907837, "learning_rate": 1.3994679221275509e-06, "loss": 1.3899, "step": 6620 }, { "epoch": 0.9484314568113451, "grad_norm": 1.0550694465637207, "learning_rate": 1.391743385587363e-06, "loss": 1.4333, "step": 6621 }, { "epoch": 0.948574702764647, "grad_norm": 1.4299442768096924, "learning_rate": 1.3840400769646322e-06, "loss": 1.3545, "step": 6622 }, { "epoch": 0.9487179487179487, "grad_norm": 1.113505482673645, "learning_rate": 1.376357997917721e-06, "loss": 1.2025, "step": 6623 }, { "epoch": 0.9488611946712505, "grad_norm": 0.9943010210990906, "learning_rate": 1.3686971501003621e-06, "loss": 1.4224, "step": 6624 }, { "epoch": 0.9490044406245524, "grad_norm": 0.9991193413734436, "learning_rate": 1.3610575351617693e-06, "loss": 1.2803, "step": 6625 }, { "epoch": 0.9491476865778542, "grad_norm": 1.141183853149414, "learning_rate": 1.3534391547465608e-06, "loss": 1.299, "step": 6626 }, { "epoch": 0.949290932531156, "grad_norm": 1.2125797271728516, "learning_rate": 1.34584201049478e-06, "loss": 1.4634, "step": 6627 }, { "epoch": 0.9494341784844578, "grad_norm": 1.142867088317871, "learning_rate": 1.3382661040418964e-06, "loss": 1.4659, "step": 6628 }, { "epoch": 0.9495774244377596, "grad_norm": 1.1054949760437012, "learning_rate": 1.3307114370188057e-06, "loss": 1.3877, "step": 6629 }, { "epoch": 0.9497206703910615, "grad_norm": 1.033025860786438, "learning_rate": 1.3231780110518844e-06, "loss": 1.5872, "step": 6630 }, { "epoch": 0.9498639163443633, "grad_norm": 0.9341566562652588, "learning_rate": 1.3156658277628463e-06, "loss": 1.3833, "step": 6631 }, { "epoch": 0.9500071622976651, "grad_norm": 1.021216630935669, "learning_rate": 1.3081748887689094e-06, "loss": 1.337, "step": 6632 }, { "epoch": 0.9501504082509669, "grad_norm": 0.95775306224823, "learning_rate": 1.3007051956826611e-06, "loss": 1.3069, "step": 6633 }, { "epoch": 0.9502936542042687, "grad_norm": 0.963055431842804, "learning_rate": 1.2932567501121707e-06, "loss": 1.4067, "step": 6634 }, { "epoch": 0.9504369001575705, "grad_norm": 1.080165147781372, "learning_rate": 1.2858295536608778e-06, "loss": 1.3757, "step": 6635 }, { "epoch": 0.9505801461108724, "grad_norm": 0.9608297944068909, "learning_rate": 1.2784236079276924e-06, "loss": 1.5086, "step": 6636 }, { "epoch": 0.9507233920641742, "grad_norm": 1.15897798538208, "learning_rate": 1.271038914506928e-06, "loss": 1.3479, "step": 6637 }, { "epoch": 0.950866638017476, "grad_norm": 1.2203125953674316, "learning_rate": 1.263675474988324e-06, "loss": 1.4616, "step": 6638 }, { "epoch": 0.9510098839707778, "grad_norm": 1.0418373346328735, "learning_rate": 1.2563332909570346e-06, "loss": 1.5018, "step": 6639 }, { "epoch": 0.9511531299240796, "grad_norm": 1.0435278415679932, "learning_rate": 1.2490123639936625e-06, "loss": 1.3865, "step": 6640 }, { "epoch": 0.9512963758773815, "grad_norm": 1.0926727056503296, "learning_rate": 1.2417126956742241e-06, "loss": 1.391, "step": 6641 }, { "epoch": 0.9514396218306833, "grad_norm": 0.9789139032363892, "learning_rate": 1.234434287570163e-06, "loss": 1.4996, "step": 6642 }, { "epoch": 0.9515828677839852, "grad_norm": 1.1839205026626587, "learning_rate": 1.2271771412483146e-06, "loss": 1.4096, "step": 6643 }, { "epoch": 0.9517261137372869, "grad_norm": 1.034575343132019, "learning_rate": 1.2199412582709956e-06, "loss": 1.3697, "step": 6644 }, { "epoch": 0.9518693596905887, "grad_norm": 0.9773863554000854, "learning_rate": 1.2127266401958826e-06, "loss": 1.4784, "step": 6645 }, { "epoch": 0.9520126056438906, "grad_norm": 1.2207194566726685, "learning_rate": 1.2055332885761327e-06, "loss": 1.4042, "step": 6646 }, { "epoch": 0.9521558515971924, "grad_norm": 1.030057430267334, "learning_rate": 1.1983612049602744e-06, "loss": 1.4045, "step": 6647 }, { "epoch": 0.9522990975504942, "grad_norm": 1.1055970191955566, "learning_rate": 1.1912103908922945e-06, "loss": 1.4057, "step": 6648 }, { "epoch": 0.952442343503796, "grad_norm": 1.0934616327285767, "learning_rate": 1.1840808479115727e-06, "loss": 1.4209, "step": 6649 }, { "epoch": 0.9525855894570978, "grad_norm": 1.080164909362793, "learning_rate": 1.176972577552915e-06, "loss": 1.4804, "step": 6650 }, { "epoch": 0.9527288354103997, "grad_norm": 0.9830626845359802, "learning_rate": 1.169885581346597e-06, "loss": 1.4227, "step": 6651 }, { "epoch": 0.9528720813637015, "grad_norm": 1.0269112586975098, "learning_rate": 1.1628198608182429e-06, "loss": 1.4547, "step": 6652 }, { "epoch": 0.9530153273170033, "grad_norm": 1.2169865369796753, "learning_rate": 1.1557754174889247e-06, "loss": 1.2574, "step": 6653 }, { "epoch": 0.9531585732703051, "grad_norm": 1.085243582725525, "learning_rate": 1.1487522528751404e-06, "loss": 1.4427, "step": 6654 }, { "epoch": 0.9533018192236069, "grad_norm": 1.0422412157058716, "learning_rate": 1.1417503684888142e-06, "loss": 1.3612, "step": 6655 }, { "epoch": 0.9534450651769087, "grad_norm": 0.9516494870185852, "learning_rate": 1.1347697658372847e-06, "loss": 1.3363, "step": 6656 }, { "epoch": 0.9535883111302106, "grad_norm": 0.9706204533576965, "learning_rate": 1.127810446423261e-06, "loss": 1.3554, "step": 6657 }, { "epoch": 0.9537315570835124, "grad_norm": 1.0176167488098145, "learning_rate": 1.1208724117449554e-06, "loss": 1.5602, "step": 6658 }, { "epoch": 0.9538748030368142, "grad_norm": 1.0628103017807007, "learning_rate": 1.1139556632959515e-06, "loss": 1.3962, "step": 6659 }, { "epoch": 0.954018048990116, "grad_norm": 0.9588499665260315, "learning_rate": 1.1070602025652355e-06, "loss": 1.4275, "step": 6660 }, { "epoch": 0.9541612949434178, "grad_norm": 1.1299690008163452, "learning_rate": 1.1001860310372314e-06, "loss": 1.3483, "step": 6661 }, { "epoch": 0.9543045408967197, "grad_norm": 0.9739903211593628, "learning_rate": 1.0933331501917998e-06, "loss": 1.5693, "step": 6662 }, { "epoch": 0.9544477868500215, "grad_norm": 0.9503809809684753, "learning_rate": 1.0865015615041607e-06, "loss": 1.4149, "step": 6663 }, { "epoch": 0.9545910328033234, "grad_norm": 1.2064324617385864, "learning_rate": 1.0796912664450265e-06, "loss": 1.4276, "step": 6664 }, { "epoch": 0.9547342787566251, "grad_norm": 0.9394369721412659, "learning_rate": 1.0729022664804467e-06, "loss": 1.5726, "step": 6665 }, { "epoch": 0.9548775247099269, "grad_norm": 1.1692692041397095, "learning_rate": 1.0661345630719522e-06, "loss": 1.3959, "step": 6666 }, { "epoch": 0.9550207706632288, "grad_norm": 0.9987855553627014, "learning_rate": 1.0593881576764664e-06, "loss": 1.3795, "step": 6667 }, { "epoch": 0.9551640166165306, "grad_norm": 1.0725371837615967, "learning_rate": 1.0526630517462833e-06, "loss": 1.55, "step": 6668 }, { "epoch": 0.9553072625698324, "grad_norm": 1.0539506673812866, "learning_rate": 1.045959246729189e-06, "loss": 1.315, "step": 6669 }, { "epoch": 0.9554505085231342, "grad_norm": 1.0855098962783813, "learning_rate": 1.0392767440683516e-06, "loss": 1.4658, "step": 6670 }, { "epoch": 0.955593754476436, "grad_norm": 1.0522102117538452, "learning_rate": 1.03261554520232e-06, "loss": 1.3525, "step": 6671 }, { "epoch": 0.9557370004297379, "grad_norm": 1.0024594068527222, "learning_rate": 1.0259756515651032e-06, "loss": 1.5479, "step": 6672 }, { "epoch": 0.9558802463830397, "grad_norm": 1.199232816696167, "learning_rate": 1.0193570645860905e-06, "loss": 1.2984, "step": 6673 }, { "epoch": 0.9560234923363415, "grad_norm": 0.950915515422821, "learning_rate": 1.0127597856901205e-06, "loss": 1.361, "step": 6674 }, { "epoch": 0.9561667382896433, "grad_norm": 1.133409023284912, "learning_rate": 1.0061838162974235e-06, "loss": 1.3439, "step": 6675 }, { "epoch": 0.9563099842429451, "grad_norm": 1.2114832401275635, "learning_rate": 9.996291578236228e-07, "loss": 1.2736, "step": 6676 }, { "epoch": 0.956453230196247, "grad_norm": 1.8214119672775269, "learning_rate": 9.930958116797895e-07, "loss": 1.5906, "step": 6677 }, { "epoch": 0.9565964761495488, "grad_norm": 1.2845548391342163, "learning_rate": 9.865837792723875e-07, "loss": 1.3198, "step": 6678 }, { "epoch": 0.9567397221028506, "grad_norm": 1.0146381855010986, "learning_rate": 9.800930620032843e-07, "loss": 1.5795, "step": 6679 }, { "epoch": 0.9568829680561524, "grad_norm": 1.0424240827560425, "learning_rate": 9.736236612697957e-07, "loss": 1.3348, "step": 6680 }, { "epoch": 0.9570262140094542, "grad_norm": 1.1131378412246704, "learning_rate": 9.671755784646075e-07, "loss": 1.2732, "step": 6681 }, { "epoch": 0.957169459962756, "grad_norm": 0.9339991211891174, "learning_rate": 9.607488149758425e-07, "loss": 1.2275, "step": 6682 }, { "epoch": 0.9573127059160579, "grad_norm": 1.1991435289382935, "learning_rate": 9.543433721870055e-07, "loss": 1.2605, "step": 6683 }, { "epoch": 0.9574559518693597, "grad_norm": 0.9194568395614624, "learning_rate": 9.479592514770486e-07, "loss": 1.3574, "step": 6684 }, { "epoch": 0.9575991978226616, "grad_norm": 1.0245776176452637, "learning_rate": 9.415964542203059e-07, "loss": 1.3816, "step": 6685 }, { "epoch": 0.9577424437759633, "grad_norm": 0.9724869728088379, "learning_rate": 9.352549817865263e-07, "loss": 1.5024, "step": 6686 }, { "epoch": 0.9578856897292651, "grad_norm": 1.0989818572998047, "learning_rate": 9.289348355408734e-07, "loss": 1.3091, "step": 6687 }, { "epoch": 0.958028935682567, "grad_norm": 1.2178518772125244, "learning_rate": 9.226360168439363e-07, "loss": 1.4937, "step": 6688 }, { "epoch": 0.9581721816358688, "grad_norm": 0.9448843002319336, "learning_rate": 9.16358527051675e-07, "loss": 1.5251, "step": 6689 }, { "epoch": 0.9583154275891707, "grad_norm": 1.1422120332717896, "learning_rate": 9.101023675154751e-07, "loss": 1.382, "step": 6690 }, { "epoch": 0.9584586735424724, "grad_norm": 1.2850912809371948, "learning_rate": 9.038675395821594e-07, "loss": 1.3588, "step": 6691 }, { "epoch": 0.9586019194957742, "grad_norm": 0.9747809767723083, "learning_rate": 8.976540445938986e-07, "loss": 1.3269, "step": 6692 }, { "epoch": 0.9587451654490761, "grad_norm": 0.9804068803787231, "learning_rate": 8.914618838883226e-07, "loss": 1.2533, "step": 6693 }, { "epoch": 0.9588884114023779, "grad_norm": 1.092737078666687, "learning_rate": 8.852910587984542e-07, "loss": 1.7149, "step": 6694 }, { "epoch": 0.9590316573556797, "grad_norm": 0.9056750535964966, "learning_rate": 8.791415706526973e-07, "loss": 1.4214, "step": 6695 }, { "epoch": 0.9591749033089815, "grad_norm": 1.11558198928833, "learning_rate": 8.730134207749152e-07, "loss": 1.4036, "step": 6696 }, { "epoch": 0.9593181492622833, "grad_norm": 1.0779403448104858, "learning_rate": 8.669066104843304e-07, "loss": 1.2249, "step": 6697 }, { "epoch": 0.9594613952155852, "grad_norm": 1.0764225721359253, "learning_rate": 8.608211410955802e-07, "loss": 1.3402, "step": 6698 }, { "epoch": 0.959604641168887, "grad_norm": 1.0803821086883545, "learning_rate": 8.547570139187388e-07, "loss": 1.3921, "step": 6699 }, { "epoch": 0.9597478871221888, "grad_norm": 1.080828070640564, "learning_rate": 8.487142302592288e-07, "loss": 1.4322, "step": 6700 }, { "epoch": 0.9598911330754906, "grad_norm": 1.2060621976852417, "learning_rate": 8.426927914179428e-07, "loss": 1.5657, "step": 6701 }, { "epoch": 0.9600343790287924, "grad_norm": 1.0757522583007812, "learning_rate": 8.366926986911328e-07, "loss": 1.596, "step": 6702 }, { "epoch": 0.9601776249820942, "grad_norm": 1.2298121452331543, "learning_rate": 8.307139533704766e-07, "loss": 1.3426, "step": 6703 }, { "epoch": 0.9603208709353961, "grad_norm": 1.142180323600769, "learning_rate": 8.247565567430559e-07, "loss": 1.3139, "step": 6704 }, { "epoch": 0.9604641168886979, "grad_norm": 1.0267170667648315, "learning_rate": 8.188205100913337e-07, "loss": 1.3529, "step": 6705 }, { "epoch": 0.9606073628419998, "grad_norm": 1.1094212532043457, "learning_rate": 8.12905814693199e-07, "loss": 1.51, "step": 6706 }, { "epoch": 0.9607506087953015, "grad_norm": 1.110493540763855, "learning_rate": 8.07012471821944e-07, "loss": 1.4262, "step": 6707 }, { "epoch": 0.9608938547486033, "grad_norm": 1.1183924674987793, "learning_rate": 8.011404827462654e-07, "loss": 1.3114, "step": 6708 }, { "epoch": 0.9610371007019052, "grad_norm": 0.925645112991333, "learning_rate": 7.952898487302518e-07, "loss": 1.43, "step": 6709 }, { "epoch": 0.961180346655207, "grad_norm": 1.1463572978973389, "learning_rate": 7.894605710333847e-07, "loss": 1.4017, "step": 6710 }, { "epoch": 0.9613235926085089, "grad_norm": 1.06252920627594, "learning_rate": 7.836526509105824e-07, "loss": 1.4132, "step": 6711 }, { "epoch": 0.9614668385618106, "grad_norm": 0.9852263331413269, "learning_rate": 7.778660896121448e-07, "loss": 1.6288, "step": 6712 }, { "epoch": 0.9616100845151124, "grad_norm": 1.1114845275878906, "learning_rate": 7.721008883837755e-07, "loss": 1.3084, "step": 6713 }, { "epoch": 0.9617533304684143, "grad_norm": 1.4312448501586914, "learning_rate": 7.663570484665705e-07, "loss": 1.387, "step": 6714 }, { "epoch": 0.9618965764217161, "grad_norm": 1.0424330234527588, "learning_rate": 7.606345710970409e-07, "loss": 1.5029, "step": 6715 }, { "epoch": 0.962039822375018, "grad_norm": 1.2997428178787231, "learning_rate": 7.549334575070899e-07, "loss": 1.3587, "step": 6716 }, { "epoch": 0.9621830683283197, "grad_norm": 0.85928875207901, "learning_rate": 7.492537089240471e-07, "loss": 1.4165, "step": 6717 }, { "epoch": 0.9623263142816215, "grad_norm": 1.2010200023651123, "learning_rate": 7.435953265705897e-07, "loss": 1.5883, "step": 6718 }, { "epoch": 0.9624695602349234, "grad_norm": 1.2231497764587402, "learning_rate": 7.379583116648436e-07, "loss": 1.2676, "step": 6719 }, { "epoch": 0.9626128061882252, "grad_norm": 1.2439926862716675, "learning_rate": 7.32342665420338e-07, "loss": 1.4369, "step": 6720 }, { "epoch": 0.962756052141527, "grad_norm": 1.0502089262008667, "learning_rate": 7.267483890459503e-07, "loss": 1.2661, "step": 6721 }, { "epoch": 0.9628992980948288, "grad_norm": 1.2538540363311768, "learning_rate": 7.21175483745995e-07, "loss": 1.2551, "step": 6722 }, { "epoch": 0.9630425440481306, "grad_norm": 1.2815312147140503, "learning_rate": 7.156239507202011e-07, "loss": 1.3625, "step": 6723 }, { "epoch": 0.9631857900014325, "grad_norm": 0.9382988214492798, "learning_rate": 7.100937911636574e-07, "loss": 1.2515, "step": 6724 }, { "epoch": 0.9633290359547343, "grad_norm": 1.0944647789001465, "learning_rate": 7.04585006266878e-07, "loss": 1.529, "step": 6725 }, { "epoch": 0.9634722819080361, "grad_norm": 1.2149970531463623, "learning_rate": 6.990975972157699e-07, "loss": 1.31, "step": 6726 }, { "epoch": 0.963615527861338, "grad_norm": 0.974987268447876, "learning_rate": 6.936315651916325e-07, "loss": 1.4224, "step": 6727 }, { "epoch": 0.9637587738146397, "grad_norm": 1.0514013767242432, "learning_rate": 6.881869113711692e-07, "loss": 1.3358, "step": 6728 }, { "epoch": 0.9639020197679415, "grad_norm": 0.8235097527503967, "learning_rate": 6.827636369264645e-07, "loss": 1.6115, "step": 6729 }, { "epoch": 0.9640452657212434, "grad_norm": 1.0267082452774048, "learning_rate": 6.773617430250512e-07, "loss": 1.5101, "step": 6730 }, { "epoch": 0.9641885116745452, "grad_norm": 1.089717984199524, "learning_rate": 6.71981230829788e-07, "loss": 1.5461, "step": 6731 }, { "epoch": 0.9643317576278471, "grad_norm": 1.0367276668548584, "learning_rate": 6.666221014989815e-07, "loss": 1.5668, "step": 6732 }, { "epoch": 0.9644750035811488, "grad_norm": 1.1212315559387207, "learning_rate": 6.612843561863092e-07, "loss": 1.2067, "step": 6733 }, { "epoch": 0.9646182495344506, "grad_norm": 1.1231820583343506, "learning_rate": 6.559679960408738e-07, "loss": 1.4505, "step": 6734 }, { "epoch": 0.9647614954877525, "grad_norm": 1.0434908866882324, "learning_rate": 6.506730222071488e-07, "loss": 1.4701, "step": 6735 }, { "epoch": 0.9649047414410543, "grad_norm": 0.9652475714683533, "learning_rate": 6.453994358249893e-07, "loss": 1.4284, "step": 6736 }, { "epoch": 0.9650479873943562, "grad_norm": 0.9612234830856323, "learning_rate": 6.401472380297091e-07, "loss": 1.4568, "step": 6737 }, { "epoch": 0.9651912333476579, "grad_norm": 0.8843510150909424, "learning_rate": 6.349164299519483e-07, "loss": 1.4741, "step": 6738 }, { "epoch": 0.9653344793009597, "grad_norm": 1.0622419118881226, "learning_rate": 6.297070127177617e-07, "loss": 1.2744, "step": 6739 }, { "epoch": 0.9654777252542616, "grad_norm": 0.9046955108642578, "learning_rate": 6.245189874486301e-07, "loss": 1.3447, "step": 6740 }, { "epoch": 0.9656209712075634, "grad_norm": 1.0797584056854248, "learning_rate": 6.193523552614044e-07, "loss": 1.3195, "step": 6741 }, { "epoch": 0.9657642171608652, "grad_norm": 1.1649394035339355, "learning_rate": 6.142071172683284e-07, "loss": 1.3349, "step": 6742 }, { "epoch": 0.965907463114167, "grad_norm": 0.915144145488739, "learning_rate": 6.09083274577038e-07, "loss": 1.1392, "step": 6743 }, { "epoch": 0.9660507090674688, "grad_norm": 1.137065052986145, "learning_rate": 6.039808282905735e-07, "loss": 1.3218, "step": 6744 }, { "epoch": 0.9661939550207707, "grad_norm": 1.0155001878738403, "learning_rate": 5.98899779507367e-07, "loss": 1.6545, "step": 6745 }, { "epoch": 0.9663372009740725, "grad_norm": 1.5141016244888306, "learning_rate": 5.938401293212547e-07, "loss": 1.2473, "step": 6746 }, { "epoch": 0.9664804469273743, "grad_norm": 1.0211079120635986, "learning_rate": 5.888018788214322e-07, "loss": 1.4887, "step": 6747 }, { "epoch": 0.9666236928806761, "grad_norm": 0.946195125579834, "learning_rate": 5.837850290925206e-07, "loss": 1.3365, "step": 6748 }, { "epoch": 0.9667669388339779, "grad_norm": 1.2444173097610474, "learning_rate": 5.787895812145227e-07, "loss": 1.4039, "step": 6749 }, { "epoch": 0.9669101847872797, "grad_norm": 1.1345558166503906, "learning_rate": 5.738155362628339e-07, "loss": 1.5199, "step": 6750 }, { "epoch": 0.9670534307405816, "grad_norm": 1.2315428256988525, "learning_rate": 5.68862895308242e-07, "loss": 1.2671, "step": 6751 }, { "epoch": 0.9671966766938834, "grad_norm": 1.031372308731079, "learning_rate": 5.639316594169386e-07, "loss": 1.6054, "step": 6752 }, { "epoch": 0.9673399226471853, "grad_norm": 0.985232412815094, "learning_rate": 5.590218296504857e-07, "loss": 1.5288, "step": 6753 }, { "epoch": 0.967483168600487, "grad_norm": 1.05845308303833, "learning_rate": 5.541334070658488e-07, "loss": 1.5907, "step": 6754 }, { "epoch": 0.9676264145537888, "grad_norm": 1.0177547931671143, "learning_rate": 5.492663927153863e-07, "loss": 1.2761, "step": 6755 }, { "epoch": 0.9677696605070907, "grad_norm": 1.0305039882659912, "learning_rate": 5.444207876468488e-07, "loss": 1.4256, "step": 6756 }, { "epoch": 0.9679129064603925, "grad_norm": 1.1531683206558228, "learning_rate": 5.395965929033686e-07, "loss": 1.4734, "step": 6757 }, { "epoch": 0.9680561524136944, "grad_norm": 1.1651809215545654, "learning_rate": 5.347938095234705e-07, "loss": 1.4751, "step": 6758 }, { "epoch": 0.9681993983669961, "grad_norm": 1.2625993490219116, "learning_rate": 5.300124385410943e-07, "loss": 1.3226, "step": 6759 }, { "epoch": 0.9683426443202979, "grad_norm": 1.1251366138458252, "learning_rate": 5.252524809855386e-07, "loss": 1.423, "step": 6760 }, { "epoch": 0.9684858902735998, "grad_norm": 1.1552636623382568, "learning_rate": 5.20513937881506e-07, "loss": 1.4261, "step": 6761 }, { "epoch": 0.9686291362269016, "grad_norm": 0.8800671696662903, "learning_rate": 5.157968102490918e-07, "loss": 1.365, "step": 6762 }, { "epoch": 0.9687723821802035, "grad_norm": 0.9991808533668518, "learning_rate": 5.111010991037613e-07, "loss": 1.4096, "step": 6763 }, { "epoch": 0.9689156281335052, "grad_norm": 0.8865735530853271, "learning_rate": 5.064268054564059e-07, "loss": 1.4666, "step": 6764 }, { "epoch": 0.969058874086807, "grad_norm": 0.9733909964561462, "learning_rate": 5.017739303132763e-07, "loss": 1.3083, "step": 6765 }, { "epoch": 0.9692021200401089, "grad_norm": 1.055842638015747, "learning_rate": 4.971424746760156e-07, "loss": 1.3502, "step": 6766 }, { "epoch": 0.9693453659934107, "grad_norm": 1.0827281475067139, "learning_rate": 4.925324395416709e-07, "loss": 1.6086, "step": 6767 }, { "epoch": 0.9694886119467125, "grad_norm": 1.003227949142456, "learning_rate": 4.879438259026592e-07, "loss": 1.3716, "step": 6768 }, { "epoch": 0.9696318579000143, "grad_norm": 1.0578702688217163, "learning_rate": 4.833766347468016e-07, "loss": 1.4366, "step": 6769 }, { "epoch": 0.9697751038533161, "grad_norm": 0.9403130412101746, "learning_rate": 4.788308670573005e-07, "loss": 1.256, "step": 6770 }, { "epoch": 0.969918349806618, "grad_norm": 1.1767295598983765, "learning_rate": 4.743065238127509e-07, "loss": 1.3124, "step": 6771 }, { "epoch": 0.9700615957599198, "grad_norm": 1.0601083040237427, "learning_rate": 4.698036059871291e-07, "loss": 1.5397, "step": 6772 }, { "epoch": 0.9702048417132216, "grad_norm": 0.9455981254577637, "learning_rate": 4.6532211454979324e-07, "loss": 1.4451, "step": 6773 }, { "epoch": 0.9703480876665235, "grad_norm": 0.8330476880073547, "learning_rate": 4.608620504655048e-07, "loss": 1.4231, "step": 6774 }, { "epoch": 0.9704913336198252, "grad_norm": 1.0068690776824951, "learning_rate": 4.5642341469441794e-07, "loss": 1.3338, "step": 6775 }, { "epoch": 0.970634579573127, "grad_norm": 0.9365617632865906, "learning_rate": 4.520062081920351e-07, "loss": 1.4194, "step": 6776 }, { "epoch": 0.9707778255264289, "grad_norm": 0.9804052710533142, "learning_rate": 4.4761043190929553e-07, "loss": 1.4141, "step": 6777 }, { "epoch": 0.9709210714797307, "grad_norm": 1.0885045528411865, "learning_rate": 4.4323608679248676e-07, "loss": 1.4991, "step": 6778 }, { "epoch": 0.9710643174330326, "grad_norm": 1.026187777519226, "learning_rate": 4.388831737832999e-07, "loss": 1.4373, "step": 6779 }, { "epoch": 0.9712075633863343, "grad_norm": 1.1540892124176025, "learning_rate": 4.345516938188188e-07, "loss": 1.2668, "step": 6780 }, { "epoch": 0.9713508093396361, "grad_norm": 1.0256913900375366, "learning_rate": 4.3024164783148636e-07, "loss": 1.4697, "step": 6781 }, { "epoch": 0.971494055292938, "grad_norm": 1.1743240356445312, "learning_rate": 4.2595303674916044e-07, "loss": 1.451, "step": 6782 }, { "epoch": 0.9716373012462398, "grad_norm": 1.140163779258728, "learning_rate": 4.216858614950692e-07, "loss": 1.5215, "step": 6783 }, { "epoch": 0.9717805471995417, "grad_norm": 0.940036416053772, "learning_rate": 4.174401229878333e-07, "loss": 1.4223, "step": 6784 }, { "epoch": 0.9719237931528434, "grad_norm": 0.931448221206665, "learning_rate": 4.1321582214145506e-07, "loss": 1.5661, "step": 6785 }, { "epoch": 0.9720670391061452, "grad_norm": 1.0401630401611328, "learning_rate": 4.090129598653181e-07, "loss": 1.3189, "step": 6786 }, { "epoch": 0.9722102850594471, "grad_norm": 1.2921048402786255, "learning_rate": 4.048315370641986e-07, "loss": 1.4964, "step": 6787 }, { "epoch": 0.9723535310127489, "grad_norm": 1.021125078201294, "learning_rate": 4.006715546382434e-07, "loss": 1.432, "step": 6788 }, { "epoch": 0.9724967769660507, "grad_norm": 1.0780339241027832, "learning_rate": 3.9653301348301364e-07, "loss": 1.3225, "step": 6789 }, { "epoch": 0.9726400229193525, "grad_norm": 1.312149167060852, "learning_rate": 3.92415914489408e-07, "loss": 1.4005, "step": 6790 }, { "epoch": 0.9727832688726543, "grad_norm": 1.1159489154815674, "learning_rate": 3.8832025854376176e-07, "loss": 1.3355, "step": 6791 }, { "epoch": 0.9729265148259562, "grad_norm": 0.9776840209960938, "learning_rate": 3.842460465277586e-07, "loss": 1.4045, "step": 6792 }, { "epoch": 0.973069760779258, "grad_norm": 1.1597959995269775, "learning_rate": 3.801932793184748e-07, "loss": 1.2606, "step": 6793 }, { "epoch": 0.9732130067325598, "grad_norm": 1.2329657077789307, "learning_rate": 3.7616195778836793e-07, "loss": 1.2383, "step": 6794 }, { "epoch": 0.9733562526858617, "grad_norm": 1.1108812093734741, "learning_rate": 3.721520828052771e-07, "loss": 1.4166, "step": 6795 }, { "epoch": 0.9734994986391634, "grad_norm": 0.9824594259262085, "learning_rate": 3.681636552324452e-07, "loss": 1.3938, "step": 6796 }, { "epoch": 0.9736427445924652, "grad_norm": 0.9686800241470337, "learning_rate": 3.6419667592847427e-07, "loss": 1.3513, "step": 6797 }, { "epoch": 0.9737859905457671, "grad_norm": 1.0292178392410278, "learning_rate": 3.6025114574734785e-07, "loss": 1.2447, "step": 6798 }, { "epoch": 0.9739292364990689, "grad_norm": 0.9551668167114258, "learning_rate": 3.563270655384532e-07, "loss": 1.3317, "step": 6799 }, { "epoch": 0.9740724824523708, "grad_norm": 0.9626041054725647, "learning_rate": 3.5242443614654784e-07, "loss": 1.5247, "step": 6800 }, { "epoch": 0.9742157284056725, "grad_norm": 1.173126220703125, "learning_rate": 3.4854325841175985e-07, "loss": 1.414, "step": 6801 }, { "epoch": 0.9743589743589743, "grad_norm": 1.0089622735977173, "learning_rate": 3.446835331696208e-07, "loss": 1.3249, "step": 6802 }, { "epoch": 0.9745022203122762, "grad_norm": 1.021348476409912, "learning_rate": 3.4084526125103267e-07, "loss": 1.5224, "step": 6803 }, { "epoch": 0.974645466265578, "grad_norm": 0.8531768918037415, "learning_rate": 3.370284434822679e-07, "loss": 1.4358, "step": 6804 }, { "epoch": 0.9747887122188799, "grad_norm": 1.0392380952835083, "learning_rate": 3.332330806850137e-07, "loss": 1.5288, "step": 6805 }, { "epoch": 0.9749319581721816, "grad_norm": 1.064247965812683, "learning_rate": 3.294591736763164e-07, "loss": 1.3342, "step": 6806 }, { "epoch": 0.9750752041254834, "grad_norm": 1.1191128492355347, "learning_rate": 3.2570672326858175e-07, "loss": 1.4196, "step": 6807 }, { "epoch": 0.9752184500787853, "grad_norm": 1.041569471359253, "learning_rate": 3.219757302696302e-07, "loss": 1.4812, "step": 6808 }, { "epoch": 0.9753616960320871, "grad_norm": 1.1266881227493286, "learning_rate": 3.182661954826638e-07, "loss": 1.2498, "step": 6809 }, { "epoch": 0.975504941985389, "grad_norm": 1.055417776107788, "learning_rate": 3.1457811970624364e-07, "loss": 1.3558, "step": 6810 }, { "epoch": 0.9756481879386907, "grad_norm": 1.0740935802459717, "learning_rate": 3.1091150373433465e-07, "loss": 1.2596, "step": 6811 }, { "epoch": 0.9757914338919925, "grad_norm": 1.1849511861801147, "learning_rate": 3.072663483562388e-07, "loss": 1.2935, "step": 6812 }, { "epoch": 0.9759346798452944, "grad_norm": 1.1551735401153564, "learning_rate": 3.0364265435669503e-07, "loss": 1.3887, "step": 6813 }, { "epoch": 0.9760779257985962, "grad_norm": 1.2335840463638306, "learning_rate": 3.0004042251579047e-07, "loss": 1.4372, "step": 6814 }, { "epoch": 0.976221171751898, "grad_norm": 1.142035961151123, "learning_rate": 2.9645965360898255e-07, "loss": 1.3081, "step": 6815 }, { "epoch": 0.9763644177051999, "grad_norm": 0.942271888256073, "learning_rate": 2.9290034840713245e-07, "loss": 1.4419, "step": 6816 }, { "epoch": 0.9765076636585016, "grad_norm": 0.9963545203208923, "learning_rate": 2.8936250767647167e-07, "loss": 1.4326, "step": 6817 }, { "epoch": 0.9766509096118035, "grad_norm": 1.1144119501113892, "learning_rate": 2.8584613217861324e-07, "loss": 1.3214, "step": 6818 }, { "epoch": 0.9767941555651053, "grad_norm": 0.9936862587928772, "learning_rate": 2.8235122267052936e-07, "loss": 1.3324, "step": 6819 }, { "epoch": 0.9769374015184071, "grad_norm": 1.0080571174621582, "learning_rate": 2.7887777990460716e-07, "loss": 1.343, "step": 6820 }, { "epoch": 0.977080647471709, "grad_norm": 1.0317273139953613, "learning_rate": 2.754258046285818e-07, "loss": 1.3758, "step": 6821 }, { "epoch": 0.9772238934250107, "grad_norm": 1.3107060194015503, "learning_rate": 2.719952975855811e-07, "loss": 1.3296, "step": 6822 }, { "epoch": 0.9773671393783125, "grad_norm": 1.0547453165054321, "learning_rate": 2.685862595141142e-07, "loss": 1.4308, "step": 6823 }, { "epoch": 0.9775103853316144, "grad_norm": 1.2521612644195557, "learning_rate": 2.6519869114804975e-07, "loss": 1.4205, "step": 6824 }, { "epoch": 0.9776536312849162, "grad_norm": 1.0133212804794312, "learning_rate": 2.618325932166488e-07, "loss": 1.3525, "step": 6825 }, { "epoch": 0.9777968772382181, "grad_norm": 1.0904204845428467, "learning_rate": 2.58487966444565e-07, "loss": 1.4972, "step": 6826 }, { "epoch": 0.9779401231915198, "grad_norm": 1.1512699127197266, "learning_rate": 2.5516481155180014e-07, "loss": 1.3731, "step": 6827 }, { "epoch": 0.9780833691448216, "grad_norm": 1.0409411191940308, "learning_rate": 2.518631292537488e-07, "loss": 1.4045, "step": 6828 }, { "epoch": 0.9782266150981235, "grad_norm": 1.3136940002441406, "learning_rate": 2.485829202611756e-07, "loss": 1.3718, "step": 6829 }, { "epoch": 0.9783698610514253, "grad_norm": 1.012670874595642, "learning_rate": 2.453241852802379e-07, "loss": 1.4019, "step": 6830 }, { "epoch": 0.9785131070047272, "grad_norm": 1.157504916191101, "learning_rate": 2.4208692501246354e-07, "loss": 1.2291, "step": 6831 }, { "epoch": 0.9786563529580289, "grad_norm": 0.9912055730819702, "learning_rate": 2.3887114015475056e-07, "loss": 1.3806, "step": 6832 }, { "epoch": 0.9787995989113307, "grad_norm": 1.1572833061218262, "learning_rate": 2.3567683139936735e-07, "loss": 1.6286, "step": 6833 }, { "epoch": 0.9789428448646326, "grad_norm": 1.0619378089904785, "learning_rate": 2.3250399943398614e-07, "loss": 1.3827, "step": 6834 }, { "epoch": 0.9790860908179344, "grad_norm": 1.1415191888809204, "learning_rate": 2.2935264494162724e-07, "loss": 1.4628, "step": 6835 }, { "epoch": 0.9792293367712362, "grad_norm": 1.050657868385315, "learning_rate": 2.2622276860070346e-07, "loss": 1.4915, "step": 6836 }, { "epoch": 0.979372582724538, "grad_norm": 1.2451061010360718, "learning_rate": 2.231143710849981e-07, "loss": 1.4248, "step": 6837 }, { "epoch": 0.9795158286778398, "grad_norm": 0.9803173542022705, "learning_rate": 2.20027453063687e-07, "loss": 1.4441, "step": 6838 }, { "epoch": 0.9796590746311417, "grad_norm": 1.0542032718658447, "learning_rate": 2.1696201520128302e-07, "loss": 1.3416, "step": 6839 }, { "epoch": 0.9798023205844435, "grad_norm": 1.1661409139633179, "learning_rate": 2.1391805815771382e-07, "loss": 1.2723, "step": 6840 }, { "epoch": 0.9799455665377453, "grad_norm": 1.1447373628616333, "learning_rate": 2.1089558258826637e-07, "loss": 1.3319, "step": 6841 }, { "epoch": 0.9800888124910472, "grad_norm": 0.9953629374504089, "learning_rate": 2.0789458914359793e-07, "loss": 1.3527, "step": 6842 }, { "epoch": 0.9802320584443489, "grad_norm": 1.0353646278381348, "learning_rate": 2.0491507846975843e-07, "loss": 1.4133, "step": 6843 }, { "epoch": 0.9803753043976507, "grad_norm": 1.1344181299209595, "learning_rate": 2.019570512081459e-07, "loss": 1.5246, "step": 6844 }, { "epoch": 0.9805185503509526, "grad_norm": 1.0790821313858032, "learning_rate": 1.9902050799557315e-07, "loss": 1.3916, "step": 6845 }, { "epoch": 0.9806617963042544, "grad_norm": 0.9657145738601685, "learning_rate": 1.9610544946420117e-07, "loss": 1.3994, "step": 6846 }, { "epoch": 0.9808050422575563, "grad_norm": 1.096034049987793, "learning_rate": 1.9321187624155024e-07, "loss": 1.4486, "step": 6847 }, { "epoch": 0.980948288210858, "grad_norm": 1.2756056785583496, "learning_rate": 1.9033978895054429e-07, "loss": 1.3731, "step": 6848 }, { "epoch": 0.9810915341641598, "grad_norm": 1.0752605199813843, "learning_rate": 1.8748918820948868e-07, "loss": 1.3842, "step": 6849 }, { "epoch": 0.9812347801174617, "grad_norm": 1.198684811592102, "learning_rate": 1.8466007463202596e-07, "loss": 1.4298, "step": 6850 }, { "epoch": 0.9813780260707635, "grad_norm": 1.0399978160858154, "learning_rate": 1.8185244882721332e-07, "loss": 1.5014, "step": 6851 }, { "epoch": 0.9815212720240654, "grad_norm": 1.0486814975738525, "learning_rate": 1.7906631139944508e-07, "loss": 1.2973, "step": 6852 }, { "epoch": 0.9816645179773671, "grad_norm": 1.1483771800994873, "learning_rate": 1.7630166294850813e-07, "loss": 1.3234, "step": 6853 }, { "epoch": 0.9818077639306689, "grad_norm": 1.4123873710632324, "learning_rate": 1.7355850406958196e-07, "loss": 1.3593, "step": 6854 }, { "epoch": 0.9819510098839708, "grad_norm": 1.0696710348129272, "learning_rate": 1.7083683535318306e-07, "loss": 1.3982, "step": 6855 }, { "epoch": 0.9820942558372726, "grad_norm": 1.224077582359314, "learning_rate": 1.6813665738523166e-07, "loss": 1.5042, "step": 6856 }, { "epoch": 0.9822375017905745, "grad_norm": 1.0116801261901855, "learning_rate": 1.654579707469961e-07, "loss": 1.297, "step": 6857 }, { "epoch": 0.9823807477438762, "grad_norm": 1.322712779045105, "learning_rate": 1.6280077601513734e-07, "loss": 1.2341, "step": 6858 }, { "epoch": 0.982523993697178, "grad_norm": 1.1745147705078125, "learning_rate": 1.6016507376169777e-07, "loss": 1.2589, "step": 6859 }, { "epoch": 0.9826672396504799, "grad_norm": 1.1023924350738525, "learning_rate": 1.5755086455404577e-07, "loss": 1.3073, "step": 6860 }, { "epoch": 0.9828104856037817, "grad_norm": 0.9869914650917053, "learning_rate": 1.5495814895498673e-07, "loss": 1.3961, "step": 6861 }, { "epoch": 0.9829537315570835, "grad_norm": 1.1217172145843506, "learning_rate": 1.5238692752266303e-07, "loss": 1.4043, "step": 6862 }, { "epoch": 0.9830969775103854, "grad_norm": 0.9376956820487976, "learning_rate": 1.498372008105764e-07, "loss": 1.3973, "step": 6863 }, { "epoch": 0.9832402234636871, "grad_norm": 1.1462827920913696, "learning_rate": 1.4730896936764327e-07, "loss": 1.2721, "step": 6864 }, { "epoch": 0.983383469416989, "grad_norm": 1.0549758672714233, "learning_rate": 1.448022337381061e-07, "loss": 1.4016, "step": 6865 }, { "epoch": 0.9835267153702908, "grad_norm": 1.310039758682251, "learning_rate": 1.4231699446162205e-07, "loss": 1.3705, "step": 6866 }, { "epoch": 0.9836699613235926, "grad_norm": 1.0545952320098877, "learning_rate": 1.3985325207319655e-07, "loss": 1.5126, "step": 6867 }, { "epoch": 0.9838132072768945, "grad_norm": 0.9783623218536377, "learning_rate": 1.3741100710321643e-07, "loss": 1.2753, "step": 6868 }, { "epoch": 0.9839564532301962, "grad_norm": 0.9266145825386047, "learning_rate": 1.3499026007741665e-07, "loss": 1.3952, "step": 6869 }, { "epoch": 0.984099699183498, "grad_norm": 0.94971764087677, "learning_rate": 1.3259101151694708e-07, "loss": 1.3221, "step": 6870 }, { "epoch": 0.9842429451367999, "grad_norm": 1.1135625839233398, "learning_rate": 1.3021326193830564e-07, "loss": 1.3664, "step": 6871 }, { "epoch": 0.9843861910901017, "grad_norm": 1.0000101327896118, "learning_rate": 1.2785701185333844e-07, "loss": 1.4034, "step": 6872 }, { "epoch": 0.9845294370434036, "grad_norm": 0.9760690927505493, "learning_rate": 1.2552226176931746e-07, "loss": 1.2539, "step": 6873 }, { "epoch": 0.9846726829967053, "grad_norm": 0.8988303542137146, "learning_rate": 1.2320901218884072e-07, "loss": 1.3908, "step": 6874 }, { "epoch": 0.9848159289500071, "grad_norm": 1.4073565006256104, "learning_rate": 1.2091726360989874e-07, "loss": 1.2897, "step": 6875 }, { "epoch": 0.984959174903309, "grad_norm": 1.0398823022842407, "learning_rate": 1.1864701652584132e-07, "loss": 1.657, "step": 6876 }, { "epoch": 0.9851024208566108, "grad_norm": 1.1171183586120605, "learning_rate": 1.1639827142539972e-07, "loss": 1.38, "step": 6877 }, { "epoch": 0.9852456668099127, "grad_norm": 1.0871721506118774, "learning_rate": 1.1417102879268671e-07, "loss": 1.4262, "step": 6878 }, { "epoch": 0.9853889127632144, "grad_norm": 1.1369484663009644, "learning_rate": 1.1196528910715209e-07, "loss": 1.3316, "step": 6879 }, { "epoch": 0.9855321587165162, "grad_norm": 0.960265576839447, "learning_rate": 1.0978105284363826e-07, "loss": 1.4356, "step": 6880 }, { "epoch": 0.9856754046698181, "grad_norm": 1.1848841905593872, "learning_rate": 1.0761832047238019e-07, "loss": 1.5113, "step": 6881 }, { "epoch": 0.9858186506231199, "grad_norm": 1.0930269956588745, "learning_rate": 1.0547709245893877e-07, "loss": 1.2834, "step": 6882 }, { "epoch": 0.9859618965764217, "grad_norm": 1.0428828001022339, "learning_rate": 1.0335736926426754e-07, "loss": 1.39, "step": 6883 }, { "epoch": 0.9861051425297236, "grad_norm": 1.0184683799743652, "learning_rate": 1.0125915134470143e-07, "loss": 1.4095, "step": 6884 }, { "epoch": 0.9862483884830253, "grad_norm": 0.9889693260192871, "learning_rate": 9.918243915193471e-08, "loss": 1.265, "step": 6885 }, { "epoch": 0.9863916344363272, "grad_norm": 1.1571595668792725, "learning_rate": 9.712723313302085e-08, "loss": 1.2631, "step": 6886 }, { "epoch": 0.986534880389629, "grad_norm": 1.2174410820007324, "learning_rate": 9.509353373040596e-08, "loss": 1.3352, "step": 6887 }, { "epoch": 0.9866781263429308, "grad_norm": 1.156304955482483, "learning_rate": 9.308134138188429e-08, "loss": 1.585, "step": 6888 }, { "epoch": 0.9868213722962327, "grad_norm": 1.062891960144043, "learning_rate": 9.109065652064263e-08, "loss": 1.4425, "step": 6889 }, { "epoch": 0.9869646182495344, "grad_norm": 1.059097409248352, "learning_rate": 8.9121479575216e-08, "loss": 1.4094, "step": 6890 }, { "epoch": 0.9871078642028362, "grad_norm": 0.9478225708007812, "learning_rate": 8.717381096953192e-08, "loss": 1.5065, "step": 6891 }, { "epoch": 0.9872511101561381, "grad_norm": 1.0617536306381226, "learning_rate": 8.524765112286614e-08, "loss": 1.3645, "step": 6892 }, { "epoch": 0.9873943561094399, "grad_norm": 1.1142922639846802, "learning_rate": 8.334300044987587e-08, "loss": 1.4638, "step": 6893 }, { "epoch": 0.9875376020627418, "grad_norm": 1.1490753889083862, "learning_rate": 8.145985936057754e-08, "loss": 1.344, "step": 6894 }, { "epoch": 0.9876808480160435, "grad_norm": 1.0488166809082031, "learning_rate": 7.959822826038022e-08, "loss": 1.4072, "step": 6895 }, { "epoch": 0.9878240939693453, "grad_norm": 1.1462736129760742, "learning_rate": 7.775810755003e-08, "loss": 1.4117, "step": 6896 }, { "epoch": 0.9879673399226472, "grad_norm": 1.0138330459594727, "learning_rate": 7.593949762567664e-08, "loss": 1.4115, "step": 6897 }, { "epoch": 0.988110585875949, "grad_norm": 1.1845062971115112, "learning_rate": 7.414239887880702e-08, "loss": 1.2574, "step": 6898 }, { "epoch": 0.9882538318292509, "grad_norm": 0.9185782670974731, "learning_rate": 7.236681169628945e-08, "loss": 1.4322, "step": 6899 }, { "epoch": 0.9883970777825526, "grad_norm": 1.1274644136428833, "learning_rate": 7.06127364603848e-08, "loss": 1.2809, "step": 6900 }, { "epoch": 0.9885403237358544, "grad_norm": 0.9798005819320679, "learning_rate": 6.888017354869103e-08, "loss": 1.3656, "step": 6901 }, { "epoch": 0.9886835696891563, "grad_norm": 0.8769481778144836, "learning_rate": 6.716912333417646e-08, "loss": 1.5398, "step": 6902 }, { "epoch": 0.9888268156424581, "grad_norm": 0.9875479340553284, "learning_rate": 6.5479586185202e-08, "loss": 1.323, "step": 6903 }, { "epoch": 0.98897006159576, "grad_norm": 1.095491886138916, "learning_rate": 6.38115624654656e-08, "loss": 1.3066, "step": 6904 }, { "epoch": 0.9891133075490618, "grad_norm": 1.2760183811187744, "learning_rate": 6.216505253408e-08, "loss": 1.4963, "step": 6905 }, { "epoch": 0.9892565535023635, "grad_norm": 1.1250486373901367, "learning_rate": 6.05400567454728e-08, "loss": 1.4826, "step": 6906 }, { "epoch": 0.9893997994556654, "grad_norm": 1.08491051197052, "learning_rate": 5.893657544947528e-08, "loss": 1.1679, "step": 6907 }, { "epoch": 0.9895430454089672, "grad_norm": 1.0493440628051758, "learning_rate": 5.7354608991266876e-08, "loss": 1.3012, "step": 6908 }, { "epoch": 0.989686291362269, "grad_norm": 1.173628330230713, "learning_rate": 5.5794157711430705e-08, "loss": 1.3258, "step": 6909 }, { "epoch": 0.9898295373155709, "grad_norm": 0.9683168530464172, "learning_rate": 5.4255221945864744e-08, "loss": 1.475, "step": 6910 }, { "epoch": 0.9899727832688726, "grad_norm": 0.9912375211715698, "learning_rate": 5.273780202588174e-08, "loss": 1.4734, "step": 6911 }, { "epoch": 0.9901160292221745, "grad_norm": 1.07449471950531, "learning_rate": 5.124189827813153e-08, "loss": 1.3338, "step": 6912 }, { "epoch": 0.9902592751754763, "grad_norm": 1.2173659801483154, "learning_rate": 4.9767511024656486e-08, "loss": 1.3669, "step": 6913 }, { "epoch": 0.9904025211287781, "grad_norm": 1.1356706619262695, "learning_rate": 4.8314640582858284e-08, "loss": 1.3088, "step": 6914 }, { "epoch": 0.99054576708208, "grad_norm": 0.9836452603340149, "learning_rate": 4.6883287265497844e-08, "loss": 1.4552, "step": 6915 }, { "epoch": 0.9906890130353817, "grad_norm": 1.0289698839187622, "learning_rate": 4.5473451380706463e-08, "loss": 1.3499, "step": 6916 }, { "epoch": 0.9908322589886835, "grad_norm": 1.0422101020812988, "learning_rate": 4.408513323198582e-08, "loss": 1.3797, "step": 6917 }, { "epoch": 0.9909755049419854, "grad_norm": 1.0672037601470947, "learning_rate": 4.271833311821905e-08, "loss": 1.4964, "step": 6918 }, { "epoch": 0.9911187508952872, "grad_norm": 1.1386151313781738, "learning_rate": 4.137305133362634e-08, "loss": 1.3855, "step": 6919 }, { "epoch": 0.9912619968485891, "grad_norm": 1.0069860219955444, "learning_rate": 4.0049288167842705e-08, "loss": 1.45, "step": 6920 }, { "epoch": 0.9914052428018908, "grad_norm": 1.1581194400787354, "learning_rate": 3.8747043905806856e-08, "loss": 1.3311, "step": 6921 }, { "epoch": 0.9915484887551926, "grad_norm": 1.049757480621338, "learning_rate": 3.746631882787233e-08, "loss": 1.2218, "step": 6922 }, { "epoch": 0.9916917347084945, "grad_norm": 1.0640041828155518, "learning_rate": 3.6207113209763e-08, "loss": 1.4512, "step": 6923 }, { "epoch": 0.9918349806617963, "grad_norm": 1.1585804224014282, "learning_rate": 3.496942732253983e-08, "loss": 1.3157, "step": 6924 }, { "epoch": 0.9919782266150982, "grad_norm": 1.1739224195480347, "learning_rate": 3.375326143264523e-08, "loss": 1.2783, "step": 6925 }, { "epoch": 0.9921214725684, "grad_norm": 1.1030296087265015, "learning_rate": 3.2558615801892014e-08, "loss": 1.3843, "step": 6926 }, { "epoch": 0.9922647185217017, "grad_norm": 1.0498274564743042, "learning_rate": 3.138549068745222e-08, "loss": 1.4051, "step": 6927 }, { "epoch": 0.9924079644750036, "grad_norm": 1.021190881729126, "learning_rate": 3.0233886341890504e-08, "loss": 1.4729, "step": 6928 }, { "epoch": 0.9925512104283054, "grad_norm": 1.3140523433685303, "learning_rate": 2.9103803013097453e-08, "loss": 1.1676, "step": 6929 }, { "epoch": 0.9926944563816072, "grad_norm": 1.0635639429092407, "learning_rate": 2.799524094436734e-08, "loss": 1.3982, "step": 6930 }, { "epoch": 0.9928377023349091, "grad_norm": 1.047019600868225, "learning_rate": 2.6908200374331503e-08, "loss": 1.3862, "step": 6931 }, { "epoch": 0.9929809482882108, "grad_norm": 1.0426539182662964, "learning_rate": 2.584268153701386e-08, "loss": 1.3052, "step": 6932 }, { "epoch": 0.9931241942415127, "grad_norm": 1.2257951498031616, "learning_rate": 2.4798684661786476e-08, "loss": 1.5279, "step": 6933 }, { "epoch": 0.9932674401948145, "grad_norm": 1.1203614473342896, "learning_rate": 2.377620997340291e-08, "loss": 1.3, "step": 6934 }, { "epoch": 0.9934106861481163, "grad_norm": 1.0221182107925415, "learning_rate": 2.2775257691975972e-08, "loss": 1.3475, "step": 6935 }, { "epoch": 0.9935539321014182, "grad_norm": 1.2252392768859863, "learning_rate": 2.179582803297775e-08, "loss": 1.5167, "step": 6936 }, { "epoch": 0.9936971780547199, "grad_norm": 1.054181456565857, "learning_rate": 2.0837921207272902e-08, "loss": 1.2551, "step": 6937 }, { "epoch": 0.9938404240080217, "grad_norm": 0.995672881603241, "learning_rate": 1.990153742105205e-08, "loss": 1.3657, "step": 6938 }, { "epoch": 0.9939836699613236, "grad_norm": 1.1049494743347168, "learning_rate": 1.8986676875909494e-08, "loss": 1.5315, "step": 6939 }, { "epoch": 0.9941269159146254, "grad_norm": 1.1547455787658691, "learning_rate": 1.8093339768798788e-08, "loss": 1.5175, "step": 6940 }, { "epoch": 0.9942701618679273, "grad_norm": 1.09352707862854, "learning_rate": 1.722152629201057e-08, "loss": 1.3678, "step": 6941 }, { "epoch": 0.994413407821229, "grad_norm": 1.1882737874984741, "learning_rate": 1.637123663323914e-08, "loss": 1.3243, "step": 6942 }, { "epoch": 0.9945566537745308, "grad_norm": 1.1435630321502686, "learning_rate": 1.554247097553807e-08, "loss": 1.469, "step": 6943 }, { "epoch": 0.9946998997278327, "grad_norm": 1.0475714206695557, "learning_rate": 1.47352294973091e-08, "loss": 1.4755, "step": 6944 }, { "epoch": 0.9948431456811345, "grad_norm": 1.0620249509811401, "learning_rate": 1.3949512372335438e-08, "loss": 1.6501, "step": 6945 }, { "epoch": 0.9949863916344364, "grad_norm": 1.0384702682495117, "learning_rate": 1.3185319769759564e-08, "loss": 1.334, "step": 6946 }, { "epoch": 0.9951296375877381, "grad_norm": 1.0790804624557495, "learning_rate": 1.2442651854094322e-08, "loss": 1.3465, "step": 6947 }, { "epoch": 0.9952728835410399, "grad_norm": 1.114069938659668, "learning_rate": 1.1721508785211832e-08, "loss": 1.4863, "step": 6948 }, { "epoch": 0.9954161294943418, "grad_norm": 1.0187630653381348, "learning_rate": 1.1021890718376781e-08, "loss": 1.3024, "step": 6949 }, { "epoch": 0.9955593754476436, "grad_norm": 1.1869088411331177, "learning_rate": 1.034379780416872e-08, "loss": 1.4357, "step": 6950 }, { "epoch": 0.9957026214009455, "grad_norm": 0.9803943634033203, "learning_rate": 9.687230188593077e-09, "loss": 1.3271, "step": 6951 }, { "epoch": 0.9958458673542473, "grad_norm": 1.0960941314697266, "learning_rate": 9.052188012981244e-09, "loss": 1.3742, "step": 6952 }, { "epoch": 0.995989113307549, "grad_norm": 1.0931395292282104, "learning_rate": 8.438671414034982e-09, "loss": 1.4412, "step": 6953 }, { "epoch": 0.9961323592608509, "grad_norm": 1.0480124950408936, "learning_rate": 7.846680523837524e-09, "loss": 1.219, "step": 6954 }, { "epoch": 0.9962756052141527, "grad_norm": 1.047921061515808, "learning_rate": 7.276215469831371e-09, "loss": 1.39, "step": 6955 }, { "epoch": 0.9964188511674545, "grad_norm": 1.0247312784194946, "learning_rate": 6.727276374818292e-09, "loss": 1.4031, "step": 6956 }, { "epoch": 0.9965620971207564, "grad_norm": 1.0511873960494995, "learning_rate": 6.1998633569704256e-09, "loss": 1.3113, "step": 6957 }, { "epoch": 0.9967053430740581, "grad_norm": 1.1090106964111328, "learning_rate": 5.693976529841383e-09, "loss": 1.3447, "step": 6958 }, { "epoch": 0.99684858902736, "grad_norm": 1.1817384958267212, "learning_rate": 5.209616002310736e-09, "loss": 1.3977, "step": 6959 }, { "epoch": 0.9969918349806618, "grad_norm": 1.1059447526931763, "learning_rate": 4.746781878672835e-09, "loss": 1.4947, "step": 6960 }, { "epoch": 0.9971350809339636, "grad_norm": 1.1532925367355347, "learning_rate": 4.305474258547993e-09, "loss": 1.5286, "step": 6961 }, { "epoch": 0.9972783268872655, "grad_norm": 1.181623935699463, "learning_rate": 3.885693236949095e-09, "loss": 1.3611, "step": 6962 }, { "epoch": 0.9974215728405672, "grad_norm": 1.2167772054672241, "learning_rate": 3.487438904237195e-09, "loss": 1.5892, "step": 6963 }, { "epoch": 0.997564818793869, "grad_norm": 1.1761642694473267, "learning_rate": 3.1107113461437133e-09, "loss": 1.421, "step": 6964 }, { "epoch": 0.9977080647471709, "grad_norm": 1.1366292238235474, "learning_rate": 2.755510643792647e-09, "loss": 1.4673, "step": 6965 }, { "epoch": 0.9978513107004727, "grad_norm": 1.089664340019226, "learning_rate": 2.4218368736228512e-09, "loss": 1.3498, "step": 6966 }, { "epoch": 0.9979945566537746, "grad_norm": 0.9447208046913147, "learning_rate": 2.109690107465756e-09, "loss": 1.3743, "step": 6967 }, { "epoch": 0.9981378026070763, "grad_norm": 1.1080571413040161, "learning_rate": 1.819070412545365e-09, "loss": 1.419, "step": 6968 }, { "epoch": 0.9982810485603781, "grad_norm": 1.1412129402160645, "learning_rate": 1.5499778514005415e-09, "loss": 1.4905, "step": 6969 }, { "epoch": 0.99842429451368, "grad_norm": 1.0053709745407104, "learning_rate": 1.3024124819738247e-09, "loss": 1.3341, "step": 6970 }, { "epoch": 0.9985675404669818, "grad_norm": 1.057105541229248, "learning_rate": 1.0763743575448182e-09, "loss": 1.3919, "step": 6971 }, { "epoch": 0.9987107864202837, "grad_norm": 1.2198081016540527, "learning_rate": 8.718635267856989e-10, "loss": 1.2662, "step": 6972 }, { "epoch": 0.9988540323735855, "grad_norm": 0.923511803150177, "learning_rate": 6.888800337279122e-10, "loss": 1.5828, "step": 6973 }, { "epoch": 0.9989972783268872, "grad_norm": 1.0067452192306519, "learning_rate": 5.274239177510687e-10, "loss": 1.4352, "step": 6974 }, { "epoch": 0.9991405242801891, "grad_norm": 1.035966396331787, "learning_rate": 3.874952136162513e-10, "loss": 1.3927, "step": 6975 }, { "epoch": 0.9992837702334909, "grad_norm": 1.0147660970687866, "learning_rate": 2.690939514438107e-10, "loss": 1.3475, "step": 6976 }, { "epoch": 0.9994270161867927, "grad_norm": 1.023337483406067, "learning_rate": 1.7222015673556968e-10, "loss": 1.4416, "step": 6977 }, { "epoch": 0.9995702621400946, "grad_norm": 1.0672696828842163, "learning_rate": 9.687385033041451e-11, "loss": 1.459, "step": 6978 }, { "epoch": 0.9997135080933963, "grad_norm": 1.103571891784668, "learning_rate": 4.305504844870356e-11, "loss": 1.2565, "step": 6979 }, { "epoch": 0.9998567540466982, "grad_norm": 1.0865168571472168, "learning_rate": 1.0763762692267421e-11, "loss": 1.4758, "step": 6980 }, { "epoch": 1.0, "grad_norm": 1.2903202772140503, "learning_rate": 0.0, "loss": 1.432, "step": 6981 }, { "epoch": 1.0, "step": 6981, "total_flos": 2.4986378761037414e+17, "train_loss": 1.5009833613741177, "train_runtime": 12243.7828, "train_samples_per_second": 18.245, "train_steps_per_second": 0.57 } ], "logging_steps": 1.0, "max_steps": 6981, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.4986378761037414e+17, "train_batch_size": 8, "trial_name": null, "trial_params": null }