{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 0, "global_step": 1042, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0009596928982725527, "grad_norm": 0.068359375, "learning_rate": 9.990403071017275e-06, "loss": 1.3835, "step": 1 }, { "epoch": 0.0019193857965451055, "grad_norm": 0.0673828125, "learning_rate": 9.98080614203455e-06, "loss": 1.3302, "step": 2 }, { "epoch": 0.0028790786948176585, "grad_norm": 0.0703125, "learning_rate": 9.971209213051824e-06, "loss": 1.3785, "step": 3 }, { "epoch": 0.003838771593090211, "grad_norm": 0.07080078125, "learning_rate": 9.961612284069098e-06, "loss": 1.3093, "step": 4 }, { "epoch": 0.0047984644913627635, "grad_norm": 0.07373046875, "learning_rate": 9.952015355086372e-06, "loss": 1.4606, "step": 5 }, { "epoch": 0.005758157389635317, "grad_norm": 0.07958984375, "learning_rate": 9.942418426103647e-06, "loss": 1.4186, "step": 6 }, { "epoch": 0.0067178502879078695, "grad_norm": 0.08544921875, "learning_rate": 9.932821497120923e-06, "loss": 1.4227, "step": 7 }, { "epoch": 0.007677543186180422, "grad_norm": 0.07763671875, "learning_rate": 9.923224568138197e-06, "loss": 1.2789, "step": 8 }, { "epoch": 0.008637236084452975, "grad_norm": 0.0888671875, "learning_rate": 9.913627639155471e-06, "loss": 1.4264, "step": 9 }, { "epoch": 0.009596928982725527, "grad_norm": 0.08544921875, "learning_rate": 9.904030710172746e-06, "loss": 1.3867, "step": 10 }, { "epoch": 0.01055662188099808, "grad_norm": 0.08837890625, "learning_rate": 9.89443378119002e-06, "loss": 1.3162, "step": 11 }, { "epoch": 0.011516314779270634, "grad_norm": 0.091796875, "learning_rate": 9.884836852207294e-06, "loss": 1.351, "step": 12 }, { "epoch": 0.012476007677543186, "grad_norm": 0.09765625, "learning_rate": 9.875239923224569e-06, "loss": 1.3704, "step": 13 }, { "epoch": 0.013435700575815739, "grad_norm": 0.0986328125, "learning_rate": 9.865642994241843e-06, "loss": 1.3264, "step": 14 }, { "epoch": 0.014395393474088292, "grad_norm": 0.1025390625, "learning_rate": 9.856046065259119e-06, "loss": 1.3761, "step": 15 }, { "epoch": 0.015355086372360844, "grad_norm": 0.10546875, "learning_rate": 9.846449136276392e-06, "loss": 1.3366, "step": 16 }, { "epoch": 0.016314779270633396, "grad_norm": 0.10009765625, "learning_rate": 9.836852207293666e-06, "loss": 1.2195, "step": 17 }, { "epoch": 0.01727447216890595, "grad_norm": 0.10888671875, "learning_rate": 9.82725527831094e-06, "loss": 1.3383, "step": 18 }, { "epoch": 0.018234165067178502, "grad_norm": 0.1142578125, "learning_rate": 9.817658349328216e-06, "loss": 1.4293, "step": 19 }, { "epoch": 0.019193857965451054, "grad_norm": 0.12158203125, "learning_rate": 9.80806142034549e-06, "loss": 1.3755, "step": 20 }, { "epoch": 0.02015355086372361, "grad_norm": 0.119140625, "learning_rate": 9.798464491362765e-06, "loss": 1.4084, "step": 21 }, { "epoch": 0.02111324376199616, "grad_norm": 0.1259765625, "learning_rate": 9.78886756238004e-06, "loss": 1.2938, "step": 22 }, { "epoch": 0.022072936660268713, "grad_norm": 0.1328125, "learning_rate": 9.779270633397314e-06, "loss": 1.3799, "step": 23 }, { "epoch": 0.023032629558541268, "grad_norm": 0.1328125, "learning_rate": 9.769673704414588e-06, "loss": 1.3184, "step": 24 }, { "epoch": 0.02399232245681382, "grad_norm": 0.13671875, "learning_rate": 9.760076775431862e-06, "loss": 1.367, "step": 25 }, { "epoch": 0.02495201535508637, "grad_norm": 0.1884765625, "learning_rate": 9.750479846449137e-06, "loss": 1.4573, "step": 26 }, { "epoch": 0.025911708253358926, "grad_norm": 0.169921875, "learning_rate": 9.740882917466411e-06, "loss": 1.5804, "step": 27 }, { "epoch": 0.026871401151631478, "grad_norm": 0.1669921875, "learning_rate": 9.731285988483687e-06, "loss": 1.4974, "step": 28 }, { "epoch": 0.02783109404990403, "grad_norm": 0.1455078125, "learning_rate": 9.721689059500961e-06, "loss": 1.4201, "step": 29 }, { "epoch": 0.028790786948176585, "grad_norm": 0.15625, "learning_rate": 9.712092130518234e-06, "loss": 1.3526, "step": 30 }, { "epoch": 0.029750479846449136, "grad_norm": 0.15234375, "learning_rate": 9.702495201535508e-06, "loss": 1.3501, "step": 31 }, { "epoch": 0.030710172744721688, "grad_norm": 0.1591796875, "learning_rate": 9.692898272552784e-06, "loss": 1.3327, "step": 32 }, { "epoch": 0.03166986564299424, "grad_norm": 0.162109375, "learning_rate": 9.683301343570059e-06, "loss": 1.3058, "step": 33 }, { "epoch": 0.03262955854126679, "grad_norm": 0.1669921875, "learning_rate": 9.673704414587333e-06, "loss": 1.3515, "step": 34 }, { "epoch": 0.03358925143953935, "grad_norm": 0.177734375, "learning_rate": 9.664107485604607e-06, "loss": 1.4229, "step": 35 }, { "epoch": 0.0345489443378119, "grad_norm": 0.158203125, "learning_rate": 9.654510556621882e-06, "loss": 1.2888, "step": 36 }, { "epoch": 0.03550863723608445, "grad_norm": 0.189453125, "learning_rate": 9.644913627639156e-06, "loss": 1.4405, "step": 37 }, { "epoch": 0.036468330134357005, "grad_norm": 0.169921875, "learning_rate": 9.63531669865643e-06, "loss": 1.3298, "step": 38 }, { "epoch": 0.03742802303262956, "grad_norm": 0.1806640625, "learning_rate": 9.625719769673705e-06, "loss": 1.3527, "step": 39 }, { "epoch": 0.03838771593090211, "grad_norm": 0.193359375, "learning_rate": 9.61612284069098e-06, "loss": 1.3846, "step": 40 }, { "epoch": 0.03934740882917467, "grad_norm": 0.181640625, "learning_rate": 9.606525911708255e-06, "loss": 1.4143, "step": 41 }, { "epoch": 0.04030710172744722, "grad_norm": 0.197265625, "learning_rate": 9.59692898272553e-06, "loss": 1.3999, "step": 42 }, { "epoch": 0.04126679462571977, "grad_norm": 0.1865234375, "learning_rate": 9.587332053742802e-06, "loss": 1.3248, "step": 43 }, { "epoch": 0.04222648752399232, "grad_norm": 0.1796875, "learning_rate": 9.577735124760078e-06, "loss": 1.2913, "step": 44 }, { "epoch": 0.04318618042226487, "grad_norm": 0.1884765625, "learning_rate": 9.568138195777352e-06, "loss": 1.3192, "step": 45 }, { "epoch": 0.044145873320537425, "grad_norm": 0.1923828125, "learning_rate": 9.558541266794627e-06, "loss": 1.3543, "step": 46 }, { "epoch": 0.045105566218809984, "grad_norm": 0.19140625, "learning_rate": 9.548944337811901e-06, "loss": 1.2851, "step": 47 }, { "epoch": 0.046065259117082535, "grad_norm": 0.201171875, "learning_rate": 9.539347408829175e-06, "loss": 1.3718, "step": 48 }, { "epoch": 0.04702495201535509, "grad_norm": 0.2021484375, "learning_rate": 9.52975047984645e-06, "loss": 1.4037, "step": 49 }, { "epoch": 0.04798464491362764, "grad_norm": 0.1923828125, "learning_rate": 9.520153550863724e-06, "loss": 1.2992, "step": 50 }, { "epoch": 0.04894433781190019, "grad_norm": 0.1943359375, "learning_rate": 9.510556621880998e-06, "loss": 1.3151, "step": 51 }, { "epoch": 0.04990403071017274, "grad_norm": 0.1923828125, "learning_rate": 9.500959692898273e-06, "loss": 1.2815, "step": 52 }, { "epoch": 0.0508637236084453, "grad_norm": 0.19921875, "learning_rate": 9.491362763915549e-06, "loss": 1.3421, "step": 53 }, { "epoch": 0.05182341650671785, "grad_norm": 0.1923828125, "learning_rate": 9.481765834932823e-06, "loss": 1.3319, "step": 54 }, { "epoch": 0.052783109404990404, "grad_norm": 0.1787109375, "learning_rate": 9.472168905950097e-06, "loss": 1.2811, "step": 55 }, { "epoch": 0.053742802303262956, "grad_norm": 0.1982421875, "learning_rate": 9.46257197696737e-06, "loss": 1.3407, "step": 56 }, { "epoch": 0.05470249520153551, "grad_norm": 0.181640625, "learning_rate": 9.452975047984646e-06, "loss": 1.1884, "step": 57 }, { "epoch": 0.05566218809980806, "grad_norm": 0.1904296875, "learning_rate": 9.44337811900192e-06, "loss": 1.2598, "step": 58 }, { "epoch": 0.05662188099808062, "grad_norm": 0.193359375, "learning_rate": 9.433781190019195e-06, "loss": 1.3258, "step": 59 }, { "epoch": 0.05758157389635317, "grad_norm": 0.2060546875, "learning_rate": 9.424184261036469e-06, "loss": 1.3675, "step": 60 }, { "epoch": 0.05854126679462572, "grad_norm": 0.1884765625, "learning_rate": 9.414587332053743e-06, "loss": 1.2389, "step": 61 }, { "epoch": 0.05950095969289827, "grad_norm": 0.1826171875, "learning_rate": 9.404990403071018e-06, "loss": 1.281, "step": 62 }, { "epoch": 0.060460652591170824, "grad_norm": 0.1904296875, "learning_rate": 9.395393474088292e-06, "loss": 1.3178, "step": 63 }, { "epoch": 0.061420345489443376, "grad_norm": 0.1708984375, "learning_rate": 9.385796545105566e-06, "loss": 1.215, "step": 64 }, { "epoch": 0.06238003838771593, "grad_norm": 0.1669921875, "learning_rate": 9.376199616122842e-06, "loss": 1.1806, "step": 65 }, { "epoch": 0.06333973128598848, "grad_norm": 0.1767578125, "learning_rate": 9.366602687140117e-06, "loss": 1.1896, "step": 66 }, { "epoch": 0.06429942418426103, "grad_norm": 0.1728515625, "learning_rate": 9.357005758157391e-06, "loss": 1.1949, "step": 67 }, { "epoch": 0.06525911708253358, "grad_norm": 0.21484375, "learning_rate": 9.347408829174665e-06, "loss": 1.3085, "step": 68 }, { "epoch": 0.06621880998080615, "grad_norm": 0.173828125, "learning_rate": 9.33781190019194e-06, "loss": 1.1966, "step": 69 }, { "epoch": 0.0671785028790787, "grad_norm": 0.1787109375, "learning_rate": 9.328214971209214e-06, "loss": 1.2213, "step": 70 }, { "epoch": 0.06813819577735125, "grad_norm": 0.17578125, "learning_rate": 9.318618042226488e-06, "loss": 1.2269, "step": 71 }, { "epoch": 0.0690978886756238, "grad_norm": 0.17578125, "learning_rate": 9.309021113243763e-06, "loss": 1.2402, "step": 72 }, { "epoch": 0.07005758157389635, "grad_norm": 0.1787109375, "learning_rate": 9.299424184261039e-06, "loss": 1.2418, "step": 73 }, { "epoch": 0.0710172744721689, "grad_norm": 0.166015625, "learning_rate": 9.289827255278311e-06, "loss": 1.1873, "step": 74 }, { "epoch": 0.07197696737044146, "grad_norm": 0.185546875, "learning_rate": 9.280230326295585e-06, "loss": 1.2642, "step": 75 }, { "epoch": 0.07293666026871401, "grad_norm": 0.177734375, "learning_rate": 9.27063339731286e-06, "loss": 1.2479, "step": 76 }, { "epoch": 0.07389635316698656, "grad_norm": 0.16015625, "learning_rate": 9.261036468330134e-06, "loss": 1.178, "step": 77 }, { "epoch": 0.07485604606525911, "grad_norm": 0.1552734375, "learning_rate": 9.25143953934741e-06, "loss": 1.1754, "step": 78 }, { "epoch": 0.07581573896353166, "grad_norm": 0.1611328125, "learning_rate": 9.241842610364684e-06, "loss": 1.2165, "step": 79 }, { "epoch": 0.07677543186180422, "grad_norm": 0.16796875, "learning_rate": 9.232245681381959e-06, "loss": 1.1865, "step": 80 }, { "epoch": 0.07773512476007678, "grad_norm": 0.1748046875, "learning_rate": 9.222648752399233e-06, "loss": 1.2512, "step": 81 }, { "epoch": 0.07869481765834933, "grad_norm": 0.240234375, "learning_rate": 9.213051823416507e-06, "loss": 1.4117, "step": 82 }, { "epoch": 0.07965451055662189, "grad_norm": 0.1455078125, "learning_rate": 9.203454894433782e-06, "loss": 1.1267, "step": 83 }, { "epoch": 0.08061420345489444, "grad_norm": 0.166015625, "learning_rate": 9.193857965451056e-06, "loss": 1.2428, "step": 84 }, { "epoch": 0.08157389635316699, "grad_norm": 0.16796875, "learning_rate": 9.18426103646833e-06, "loss": 1.1984, "step": 85 }, { "epoch": 0.08253358925143954, "grad_norm": 0.150390625, "learning_rate": 9.174664107485606e-06, "loss": 1.1621, "step": 86 }, { "epoch": 0.08349328214971209, "grad_norm": 0.14453125, "learning_rate": 9.16506717850288e-06, "loss": 1.1578, "step": 87 }, { "epoch": 0.08445297504798464, "grad_norm": 0.146484375, "learning_rate": 9.155470249520153e-06, "loss": 1.1536, "step": 88 }, { "epoch": 0.0854126679462572, "grad_norm": 0.173828125, "learning_rate": 9.145873320537428e-06, "loss": 1.2703, "step": 89 }, { "epoch": 0.08637236084452975, "grad_norm": 0.158203125, "learning_rate": 9.136276391554704e-06, "loss": 1.1967, "step": 90 }, { "epoch": 0.0873320537428023, "grad_norm": 0.23828125, "learning_rate": 9.126679462571978e-06, "loss": 1.4335, "step": 91 }, { "epoch": 0.08829174664107485, "grad_norm": 0.1513671875, "learning_rate": 9.117082533589252e-06, "loss": 1.1648, "step": 92 }, { "epoch": 0.0892514395393474, "grad_norm": 0.140625, "learning_rate": 9.107485604606527e-06, "loss": 1.1267, "step": 93 }, { "epoch": 0.09021113243761997, "grad_norm": 0.1259765625, "learning_rate": 9.097888675623801e-06, "loss": 1.1272, "step": 94 }, { "epoch": 0.09117082533589252, "grad_norm": 0.138671875, "learning_rate": 9.088291746641075e-06, "loss": 1.158, "step": 95 }, { "epoch": 0.09213051823416507, "grad_norm": 0.1484375, "learning_rate": 9.07869481765835e-06, "loss": 1.2036, "step": 96 }, { "epoch": 0.09309021113243762, "grad_norm": 0.1279296875, "learning_rate": 9.069097888675624e-06, "loss": 1.1793, "step": 97 }, { "epoch": 0.09404990403071017, "grad_norm": 0.1357421875, "learning_rate": 9.0595009596929e-06, "loss": 1.1281, "step": 98 }, { "epoch": 0.09500959692898273, "grad_norm": 0.1376953125, "learning_rate": 9.049904030710174e-06, "loss": 1.145, "step": 99 }, { "epoch": 0.09596928982725528, "grad_norm": 0.12451171875, "learning_rate": 9.040307101727449e-06, "loss": 1.0348, "step": 100 }, { "epoch": 0.09692898272552783, "grad_norm": 0.13671875, "learning_rate": 9.030710172744721e-06, "loss": 1.1187, "step": 101 }, { "epoch": 0.09788867562380038, "grad_norm": 0.150390625, "learning_rate": 9.021113243761996e-06, "loss": 1.2216, "step": 102 }, { "epoch": 0.09884836852207293, "grad_norm": 0.2060546875, "learning_rate": 9.011516314779272e-06, "loss": 1.136, "step": 103 }, { "epoch": 0.09980806142034548, "grad_norm": 0.1337890625, "learning_rate": 9.001919385796546e-06, "loss": 1.0826, "step": 104 }, { "epoch": 0.10076775431861804, "grad_norm": 0.1435546875, "learning_rate": 8.99232245681382e-06, "loss": 1.1926, "step": 105 }, { "epoch": 0.1017274472168906, "grad_norm": 0.1396484375, "learning_rate": 8.982725527831095e-06, "loss": 1.1645, "step": 106 }, { "epoch": 0.10268714011516315, "grad_norm": 0.1298828125, "learning_rate": 8.973128598848369e-06, "loss": 1.1451, "step": 107 }, { "epoch": 0.1036468330134357, "grad_norm": 0.12890625, "learning_rate": 8.963531669865643e-06, "loss": 1.1533, "step": 108 }, { "epoch": 0.10460652591170826, "grad_norm": 0.11474609375, "learning_rate": 8.953934740882918e-06, "loss": 1.1095, "step": 109 }, { "epoch": 0.10556621880998081, "grad_norm": 0.1484375, "learning_rate": 8.944337811900192e-06, "loss": 1.0983, "step": 110 }, { "epoch": 0.10652591170825336, "grad_norm": 0.1162109375, "learning_rate": 8.934740882917468e-06, "loss": 1.0713, "step": 111 }, { "epoch": 0.10748560460652591, "grad_norm": 0.1337890625, "learning_rate": 8.925143953934742e-06, "loss": 1.1187, "step": 112 }, { "epoch": 0.10844529750479846, "grad_norm": 0.11767578125, "learning_rate": 8.915547024952017e-06, "loss": 1.0156, "step": 113 }, { "epoch": 0.10940499040307101, "grad_norm": 0.12060546875, "learning_rate": 8.905950095969291e-06, "loss": 1.1281, "step": 114 }, { "epoch": 0.11036468330134357, "grad_norm": 0.12451171875, "learning_rate": 8.896353166986565e-06, "loss": 1.1185, "step": 115 }, { "epoch": 0.11132437619961612, "grad_norm": 0.1552734375, "learning_rate": 8.88675623800384e-06, "loss": 1.1278, "step": 116 }, { "epoch": 0.11228406909788867, "grad_norm": 0.1279296875, "learning_rate": 8.877159309021114e-06, "loss": 1.0986, "step": 117 }, { "epoch": 0.11324376199616124, "grad_norm": 0.10693359375, "learning_rate": 8.867562380038388e-06, "loss": 1.0399, "step": 118 }, { "epoch": 0.11420345489443379, "grad_norm": 0.1220703125, "learning_rate": 8.857965451055663e-06, "loss": 1.1677, "step": 119 }, { "epoch": 0.11516314779270634, "grad_norm": 0.1259765625, "learning_rate": 8.848368522072937e-06, "loss": 1.1291, "step": 120 }, { "epoch": 0.11612284069097889, "grad_norm": 0.111328125, "learning_rate": 8.838771593090211e-06, "loss": 1.0135, "step": 121 }, { "epoch": 0.11708253358925144, "grad_norm": 0.1103515625, "learning_rate": 8.829174664107486e-06, "loss": 1.1206, "step": 122 }, { "epoch": 0.118042226487524, "grad_norm": 0.1240234375, "learning_rate": 8.819577735124762e-06, "loss": 1.0963, "step": 123 }, { "epoch": 0.11900191938579655, "grad_norm": 0.119140625, "learning_rate": 8.809980806142036e-06, "loss": 1.1072, "step": 124 }, { "epoch": 0.1199616122840691, "grad_norm": 0.12060546875, "learning_rate": 8.80038387715931e-06, "loss": 1.1213, "step": 125 }, { "epoch": 0.12092130518234165, "grad_norm": 0.10400390625, "learning_rate": 8.790786948176585e-06, "loss": 1.0651, "step": 126 }, { "epoch": 0.1218809980806142, "grad_norm": 0.11572265625, "learning_rate": 8.781190019193859e-06, "loss": 1.056, "step": 127 }, { "epoch": 0.12284069097888675, "grad_norm": 0.11767578125, "learning_rate": 8.771593090211133e-06, "loss": 1.082, "step": 128 }, { "epoch": 0.1238003838771593, "grad_norm": 0.1533203125, "learning_rate": 8.761996161228408e-06, "loss": 1.2184, "step": 129 }, { "epoch": 0.12476007677543186, "grad_norm": 0.193359375, "learning_rate": 8.752399232245682e-06, "loss": 1.0084, "step": 130 }, { "epoch": 0.1257197696737044, "grad_norm": 0.1298828125, "learning_rate": 8.742802303262956e-06, "loss": 1.1232, "step": 131 }, { "epoch": 0.12667946257197696, "grad_norm": 0.1044921875, "learning_rate": 8.73320537428023e-06, "loss": 1.0711, "step": 132 }, { "epoch": 0.1276391554702495, "grad_norm": 0.1513671875, "learning_rate": 8.723608445297505e-06, "loss": 1.2007, "step": 133 }, { "epoch": 0.12859884836852206, "grad_norm": 0.11767578125, "learning_rate": 8.71401151631478e-06, "loss": 1.1375, "step": 134 }, { "epoch": 0.1295585412667946, "grad_norm": 0.12109375, "learning_rate": 8.704414587332054e-06, "loss": 1.0473, "step": 135 }, { "epoch": 0.13051823416506717, "grad_norm": 0.1240234375, "learning_rate": 8.69481765834933e-06, "loss": 1.1487, "step": 136 }, { "epoch": 0.13147792706333974, "grad_norm": 0.099609375, "learning_rate": 8.685220729366604e-06, "loss": 1.072, "step": 137 }, { "epoch": 0.1324376199616123, "grad_norm": 0.107421875, "learning_rate": 8.675623800383878e-06, "loss": 1.0678, "step": 138 }, { "epoch": 0.13339731285988485, "grad_norm": 0.10205078125, "learning_rate": 8.666026871401153e-06, "loss": 1.1066, "step": 139 }, { "epoch": 0.1343570057581574, "grad_norm": 0.1220703125, "learning_rate": 8.656429942418427e-06, "loss": 1.1308, "step": 140 }, { "epoch": 0.13531669865642995, "grad_norm": 0.1220703125, "learning_rate": 8.646833013435701e-06, "loss": 1.0877, "step": 141 }, { "epoch": 0.1362763915547025, "grad_norm": 0.1083984375, "learning_rate": 8.637236084452976e-06, "loss": 1.0643, "step": 142 }, { "epoch": 0.13723608445297505, "grad_norm": 0.1015625, "learning_rate": 8.62763915547025e-06, "loss": 0.9934, "step": 143 }, { "epoch": 0.1381957773512476, "grad_norm": 0.12109375, "learning_rate": 8.618042226487526e-06, "loss": 1.1454, "step": 144 }, { "epoch": 0.13915547024952016, "grad_norm": 0.10595703125, "learning_rate": 8.6084452975048e-06, "loss": 1.0282, "step": 145 }, { "epoch": 0.1401151631477927, "grad_norm": 0.1865234375, "learning_rate": 8.598848368522073e-06, "loss": 1.2197, "step": 146 }, { "epoch": 0.14107485604606526, "grad_norm": 0.11328125, "learning_rate": 8.589251439539347e-06, "loss": 1.0509, "step": 147 }, { "epoch": 0.1420345489443378, "grad_norm": 0.10693359375, "learning_rate": 8.579654510556623e-06, "loss": 1.0721, "step": 148 }, { "epoch": 0.14299424184261036, "grad_norm": 0.119140625, "learning_rate": 8.570057581573898e-06, "loss": 1.1034, "step": 149 }, { "epoch": 0.14395393474088292, "grad_norm": 0.1162109375, "learning_rate": 8.560460652591172e-06, "loss": 1.0192, "step": 150 }, { "epoch": 0.14491362763915547, "grad_norm": 0.11767578125, "learning_rate": 8.550863723608446e-06, "loss": 1.0542, "step": 151 }, { "epoch": 0.14587332053742802, "grad_norm": 0.107421875, "learning_rate": 8.54126679462572e-06, "loss": 1.0556, "step": 152 }, { "epoch": 0.14683301343570057, "grad_norm": 0.10595703125, "learning_rate": 8.531669865642995e-06, "loss": 1.0191, "step": 153 }, { "epoch": 0.14779270633397312, "grad_norm": 0.1064453125, "learning_rate": 8.522072936660269e-06, "loss": 0.9925, "step": 154 }, { "epoch": 0.14875239923224567, "grad_norm": 0.1591796875, "learning_rate": 8.512476007677543e-06, "loss": 1.1719, "step": 155 }, { "epoch": 0.14971209213051823, "grad_norm": 0.11181640625, "learning_rate": 8.502879078694818e-06, "loss": 1.1286, "step": 156 }, { "epoch": 0.15067178502879078, "grad_norm": 0.12255859375, "learning_rate": 8.493282149712094e-06, "loss": 1.1383, "step": 157 }, { "epoch": 0.15163147792706333, "grad_norm": 0.1611328125, "learning_rate": 8.483685220729368e-06, "loss": 1.2015, "step": 158 }, { "epoch": 0.15259117082533588, "grad_norm": 0.11767578125, "learning_rate": 8.47408829174664e-06, "loss": 1.0849, "step": 159 }, { "epoch": 0.15355086372360843, "grad_norm": 0.09375, "learning_rate": 8.464491362763915e-06, "loss": 0.944, "step": 160 }, { "epoch": 0.15451055662188098, "grad_norm": 0.12353515625, "learning_rate": 8.454894433781191e-06, "loss": 1.0966, "step": 161 }, { "epoch": 0.15547024952015356, "grad_norm": 0.10498046875, "learning_rate": 8.445297504798465e-06, "loss": 0.9607, "step": 162 }, { "epoch": 0.15642994241842612, "grad_norm": 0.11669921875, "learning_rate": 8.43570057581574e-06, "loss": 1.1095, "step": 163 }, { "epoch": 0.15738963531669867, "grad_norm": 0.10205078125, "learning_rate": 8.426103646833014e-06, "loss": 1.0191, "step": 164 }, { "epoch": 0.15834932821497122, "grad_norm": 0.11083984375, "learning_rate": 8.416506717850288e-06, "loss": 1.0218, "step": 165 }, { "epoch": 0.15930902111324377, "grad_norm": 0.0966796875, "learning_rate": 8.406909788867563e-06, "loss": 0.9931, "step": 166 }, { "epoch": 0.16026871401151632, "grad_norm": 0.091796875, "learning_rate": 8.397312859884837e-06, "loss": 0.9998, "step": 167 }, { "epoch": 0.16122840690978887, "grad_norm": 0.10107421875, "learning_rate": 8.387715930902111e-06, "loss": 1.0155, "step": 168 }, { "epoch": 0.16218809980806143, "grad_norm": 0.115234375, "learning_rate": 8.378119001919387e-06, "loss": 0.9811, "step": 169 }, { "epoch": 0.16314779270633398, "grad_norm": 0.11376953125, "learning_rate": 8.368522072936662e-06, "loss": 0.9923, "step": 170 }, { "epoch": 0.16410748560460653, "grad_norm": 0.11083984375, "learning_rate": 8.358925143953936e-06, "loss": 1.0559, "step": 171 }, { "epoch": 0.16506717850287908, "grad_norm": 0.1083984375, "learning_rate": 8.34932821497121e-06, "loss": 1.0016, "step": 172 }, { "epoch": 0.16602687140115163, "grad_norm": 0.1025390625, "learning_rate": 8.339731285988485e-06, "loss": 0.9755, "step": 173 }, { "epoch": 0.16698656429942418, "grad_norm": 0.09423828125, "learning_rate": 8.330134357005759e-06, "loss": 1.0032, "step": 174 }, { "epoch": 0.16794625719769674, "grad_norm": 0.1318359375, "learning_rate": 8.320537428023033e-06, "loss": 1.1055, "step": 175 }, { "epoch": 0.1689059500959693, "grad_norm": 0.11865234375, "learning_rate": 8.310940499040308e-06, "loss": 1.0221, "step": 176 }, { "epoch": 0.16986564299424184, "grad_norm": 0.10986328125, "learning_rate": 8.301343570057582e-06, "loss": 1.0432, "step": 177 }, { "epoch": 0.1708253358925144, "grad_norm": 0.1552734375, "learning_rate": 8.291746641074856e-06, "loss": 1.0867, "step": 178 }, { "epoch": 0.17178502879078694, "grad_norm": 0.1083984375, "learning_rate": 8.28214971209213e-06, "loss": 1.0279, "step": 179 }, { "epoch": 0.1727447216890595, "grad_norm": 0.10888671875, "learning_rate": 8.272552783109405e-06, "loss": 1.0319, "step": 180 }, { "epoch": 0.17370441458733205, "grad_norm": 0.0966796875, "learning_rate": 8.26295585412668e-06, "loss": 1.0082, "step": 181 }, { "epoch": 0.1746641074856046, "grad_norm": 0.1025390625, "learning_rate": 8.253358925143955e-06, "loss": 0.9761, "step": 182 }, { "epoch": 0.17562380038387715, "grad_norm": 0.09765625, "learning_rate": 8.24376199616123e-06, "loss": 0.9492, "step": 183 }, { "epoch": 0.1765834932821497, "grad_norm": 0.1142578125, "learning_rate": 8.234165067178504e-06, "loss": 1.0052, "step": 184 }, { "epoch": 0.17754318618042225, "grad_norm": 0.1083984375, "learning_rate": 8.224568138195778e-06, "loss": 0.9921, "step": 185 }, { "epoch": 0.1785028790786948, "grad_norm": 0.1328125, "learning_rate": 8.214971209213053e-06, "loss": 1.0567, "step": 186 }, { "epoch": 0.17946257197696738, "grad_norm": 0.1181640625, "learning_rate": 8.205374280230327e-06, "loss": 0.9777, "step": 187 }, { "epoch": 0.18042226487523993, "grad_norm": 0.0966796875, "learning_rate": 8.195777351247601e-06, "loss": 1.0348, "step": 188 }, { "epoch": 0.1813819577735125, "grad_norm": 0.0966796875, "learning_rate": 8.186180422264876e-06, "loss": 0.9495, "step": 189 }, { "epoch": 0.18234165067178504, "grad_norm": 0.10595703125, "learning_rate": 8.176583493282152e-06, "loss": 1.0123, "step": 190 }, { "epoch": 0.1833013435700576, "grad_norm": 0.14453125, "learning_rate": 8.166986564299424e-06, "loss": 1.1257, "step": 191 }, { "epoch": 0.18426103646833014, "grad_norm": 0.166015625, "learning_rate": 8.157389635316699e-06, "loss": 1.1254, "step": 192 }, { "epoch": 0.1852207293666027, "grad_norm": 0.10205078125, "learning_rate": 8.147792706333973e-06, "loss": 1.0055, "step": 193 }, { "epoch": 0.18618042226487524, "grad_norm": 0.1064453125, "learning_rate": 8.138195777351249e-06, "loss": 1.0149, "step": 194 }, { "epoch": 0.1871401151631478, "grad_norm": 0.09619140625, "learning_rate": 8.128598848368523e-06, "loss": 0.9892, "step": 195 }, { "epoch": 0.18809980806142035, "grad_norm": 0.09130859375, "learning_rate": 8.119001919385798e-06, "loss": 0.9941, "step": 196 }, { "epoch": 0.1890595009596929, "grad_norm": 0.1298828125, "learning_rate": 8.109404990403072e-06, "loss": 1.0089, "step": 197 }, { "epoch": 0.19001919385796545, "grad_norm": 0.09619140625, "learning_rate": 8.099808061420346e-06, "loss": 0.9446, "step": 198 }, { "epoch": 0.190978886756238, "grad_norm": 0.1015625, "learning_rate": 8.09021113243762e-06, "loss": 0.9734, "step": 199 }, { "epoch": 0.19193857965451055, "grad_norm": 0.11279296875, "learning_rate": 8.080614203454895e-06, "loss": 1.0315, "step": 200 }, { "epoch": 0.1928982725527831, "grad_norm": 0.12890625, "learning_rate": 8.07101727447217e-06, "loss": 1.0699, "step": 201 }, { "epoch": 0.19385796545105566, "grad_norm": 0.1494140625, "learning_rate": 8.061420345489444e-06, "loss": 1.0639, "step": 202 }, { "epoch": 0.1948176583493282, "grad_norm": 0.10498046875, "learning_rate": 8.05182341650672e-06, "loss": 1.0076, "step": 203 }, { "epoch": 0.19577735124760076, "grad_norm": 0.08984375, "learning_rate": 8.042226487523992e-06, "loss": 0.9557, "step": 204 }, { "epoch": 0.1967370441458733, "grad_norm": 0.103515625, "learning_rate": 8.032629558541267e-06, "loss": 1.0174, "step": 205 }, { "epoch": 0.19769673704414586, "grad_norm": 0.10986328125, "learning_rate": 8.023032629558541e-06, "loss": 1.0005, "step": 206 }, { "epoch": 0.19865642994241842, "grad_norm": 0.126953125, "learning_rate": 8.013435700575817e-06, "loss": 1.0216, "step": 207 }, { "epoch": 0.19961612284069097, "grad_norm": 0.1064453125, "learning_rate": 8.003838771593091e-06, "loss": 1.0296, "step": 208 }, { "epoch": 0.20057581573896352, "grad_norm": 0.09619140625, "learning_rate": 7.994241842610366e-06, "loss": 0.9604, "step": 209 }, { "epoch": 0.20153550863723607, "grad_norm": 0.0947265625, "learning_rate": 7.98464491362764e-06, "loss": 0.9319, "step": 210 }, { "epoch": 0.20249520153550865, "grad_norm": 0.09423828125, "learning_rate": 7.975047984644914e-06, "loss": 0.959, "step": 211 }, { "epoch": 0.2034548944337812, "grad_norm": 0.1259765625, "learning_rate": 7.965451055662189e-06, "loss": 1.0316, "step": 212 }, { "epoch": 0.20441458733205375, "grad_norm": 0.11279296875, "learning_rate": 7.955854126679463e-06, "loss": 0.9858, "step": 213 }, { "epoch": 0.2053742802303263, "grad_norm": 0.1025390625, "learning_rate": 7.946257197696737e-06, "loss": 0.9937, "step": 214 }, { "epoch": 0.20633397312859886, "grad_norm": 0.09521484375, "learning_rate": 7.936660268714013e-06, "loss": 1.0125, "step": 215 }, { "epoch": 0.2072936660268714, "grad_norm": 0.111328125, "learning_rate": 7.927063339731288e-06, "loss": 1.0136, "step": 216 }, { "epoch": 0.20825335892514396, "grad_norm": 0.0888671875, "learning_rate": 7.91746641074856e-06, "loss": 0.9332, "step": 217 }, { "epoch": 0.2092130518234165, "grad_norm": 0.0908203125, "learning_rate": 7.907869481765835e-06, "loss": 0.9504, "step": 218 }, { "epoch": 0.21017274472168906, "grad_norm": 0.099609375, "learning_rate": 7.89827255278311e-06, "loss": 1.0072, "step": 219 }, { "epoch": 0.21113243761996162, "grad_norm": 0.0849609375, "learning_rate": 7.888675623800385e-06, "loss": 0.9329, "step": 220 }, { "epoch": 0.21209213051823417, "grad_norm": 0.11328125, "learning_rate": 7.87907869481766e-06, "loss": 0.9958, "step": 221 }, { "epoch": 0.21305182341650672, "grad_norm": 0.10009765625, "learning_rate": 7.869481765834934e-06, "loss": 1.0125, "step": 222 }, { "epoch": 0.21401151631477927, "grad_norm": 0.1015625, "learning_rate": 7.859884836852208e-06, "loss": 0.9808, "step": 223 }, { "epoch": 0.21497120921305182, "grad_norm": 0.1083984375, "learning_rate": 7.850287907869482e-06, "loss": 0.9976, "step": 224 }, { "epoch": 0.21593090211132437, "grad_norm": 0.11376953125, "learning_rate": 7.840690978886757e-06, "loss": 1.0249, "step": 225 }, { "epoch": 0.21689059500959693, "grad_norm": 0.1025390625, "learning_rate": 7.83109404990403e-06, "loss": 0.9899, "step": 226 }, { "epoch": 0.21785028790786948, "grad_norm": 0.1298828125, "learning_rate": 7.821497120921305e-06, "loss": 1.1065, "step": 227 }, { "epoch": 0.21880998080614203, "grad_norm": 0.1083984375, "learning_rate": 7.811900191938581e-06, "loss": 1.0697, "step": 228 }, { "epoch": 0.21976967370441458, "grad_norm": 0.1337890625, "learning_rate": 7.802303262955856e-06, "loss": 0.9401, "step": 229 }, { "epoch": 0.22072936660268713, "grad_norm": 0.119140625, "learning_rate": 7.79270633397313e-06, "loss": 1.052, "step": 230 }, { "epoch": 0.22168905950095968, "grad_norm": 0.1591796875, "learning_rate": 7.783109404990402e-06, "loss": 1.154, "step": 231 }, { "epoch": 0.22264875239923224, "grad_norm": 0.1376953125, "learning_rate": 7.773512476007678e-06, "loss": 1.0706, "step": 232 }, { "epoch": 0.2236084452975048, "grad_norm": 0.09375, "learning_rate": 7.763915547024953e-06, "loss": 0.9314, "step": 233 }, { "epoch": 0.22456813819577734, "grad_norm": 0.08056640625, "learning_rate": 7.754318618042227e-06, "loss": 0.9448, "step": 234 }, { "epoch": 0.2255278310940499, "grad_norm": 0.10107421875, "learning_rate": 7.744721689059501e-06, "loss": 1.0048, "step": 235 }, { "epoch": 0.22648752399232247, "grad_norm": 0.1103515625, "learning_rate": 7.735124760076776e-06, "loss": 1.0153, "step": 236 }, { "epoch": 0.22744721689059502, "grad_norm": 0.10302734375, "learning_rate": 7.72552783109405e-06, "loss": 0.921, "step": 237 }, { "epoch": 0.22840690978886757, "grad_norm": 0.08984375, "learning_rate": 7.715930902111324e-06, "loss": 0.8776, "step": 238 }, { "epoch": 0.22936660268714013, "grad_norm": 0.08544921875, "learning_rate": 7.706333973128599e-06, "loss": 0.9054, "step": 239 }, { "epoch": 0.23032629558541268, "grad_norm": 0.111328125, "learning_rate": 7.696737044145875e-06, "loss": 0.9415, "step": 240 }, { "epoch": 0.23128598848368523, "grad_norm": 0.1357421875, "learning_rate": 7.687140115163149e-06, "loss": 1.0546, "step": 241 }, { "epoch": 0.23224568138195778, "grad_norm": 0.1064453125, "learning_rate": 7.677543186180423e-06, "loss": 0.989, "step": 242 }, { "epoch": 0.23320537428023033, "grad_norm": 0.091796875, "learning_rate": 7.667946257197698e-06, "loss": 0.9687, "step": 243 }, { "epoch": 0.23416506717850288, "grad_norm": 0.09228515625, "learning_rate": 7.658349328214972e-06, "loss": 0.9352, "step": 244 }, { "epoch": 0.23512476007677544, "grad_norm": 0.0908203125, "learning_rate": 7.648752399232246e-06, "loss": 0.9592, "step": 245 }, { "epoch": 0.236084452975048, "grad_norm": 0.1025390625, "learning_rate": 7.63915547024952e-06, "loss": 1.0028, "step": 246 }, { "epoch": 0.23704414587332054, "grad_norm": 0.09375, "learning_rate": 7.629558541266795e-06, "loss": 0.9823, "step": 247 }, { "epoch": 0.2380038387715931, "grad_norm": 0.0966796875, "learning_rate": 7.61996161228407e-06, "loss": 0.9399, "step": 248 }, { "epoch": 0.23896353166986564, "grad_norm": 0.10546875, "learning_rate": 7.610364683301345e-06, "loss": 0.9947, "step": 249 }, { "epoch": 0.2399232245681382, "grad_norm": 0.09326171875, "learning_rate": 7.600767754318619e-06, "loss": 1.0125, "step": 250 }, { "epoch": 0.24088291746641075, "grad_norm": 0.0927734375, "learning_rate": 7.591170825335893e-06, "loss": 0.8915, "step": 251 }, { "epoch": 0.2418426103646833, "grad_norm": 0.08544921875, "learning_rate": 7.581573896353167e-06, "loss": 0.9549, "step": 252 }, { "epoch": 0.24280230326295585, "grad_norm": 0.099609375, "learning_rate": 7.571976967370443e-06, "loss": 0.9493, "step": 253 }, { "epoch": 0.2437619961612284, "grad_norm": 0.11279296875, "learning_rate": 7.562380038387716e-06, "loss": 0.9959, "step": 254 }, { "epoch": 0.24472168905950095, "grad_norm": 0.1044921875, "learning_rate": 7.5527831094049905e-06, "loss": 0.9799, "step": 255 }, { "epoch": 0.2456813819577735, "grad_norm": 0.1025390625, "learning_rate": 7.543186180422265e-06, "loss": 0.9498, "step": 256 }, { "epoch": 0.24664107485604606, "grad_norm": 0.10546875, "learning_rate": 7.53358925143954e-06, "loss": 0.8909, "step": 257 }, { "epoch": 0.2476007677543186, "grad_norm": 0.10302734375, "learning_rate": 7.523992322456814e-06, "loss": 0.9247, "step": 258 }, { "epoch": 0.24856046065259116, "grad_norm": 0.10400390625, "learning_rate": 7.514395393474089e-06, "loss": 0.9861, "step": 259 }, { "epoch": 0.2495201535508637, "grad_norm": 0.0888671875, "learning_rate": 7.504798464491363e-06, "loss": 0.949, "step": 260 }, { "epoch": 0.2504798464491363, "grad_norm": 0.1025390625, "learning_rate": 7.495201535508638e-06, "loss": 0.973, "step": 261 }, { "epoch": 0.2514395393474088, "grad_norm": 0.103515625, "learning_rate": 7.4856046065259125e-06, "loss": 0.9665, "step": 262 }, { "epoch": 0.2523992322456814, "grad_norm": 0.10400390625, "learning_rate": 7.476007677543187e-06, "loss": 0.9601, "step": 263 }, { "epoch": 0.2533589251439539, "grad_norm": 0.126953125, "learning_rate": 7.466410748560461e-06, "loss": 1.0331, "step": 264 }, { "epoch": 0.2543186180422265, "grad_norm": 0.109375, "learning_rate": 7.456813819577736e-06, "loss": 0.9773, "step": 265 }, { "epoch": 0.255278310940499, "grad_norm": 0.0927734375, "learning_rate": 7.447216890595011e-06, "loss": 0.9897, "step": 266 }, { "epoch": 0.2562380038387716, "grad_norm": 0.09814453125, "learning_rate": 7.437619961612285e-06, "loss": 0.8944, "step": 267 }, { "epoch": 0.2571976967370441, "grad_norm": 0.10791015625, "learning_rate": 7.4280230326295585e-06, "loss": 1.031, "step": 268 }, { "epoch": 0.2581573896353167, "grad_norm": 0.10888671875, "learning_rate": 7.4184261036468345e-06, "loss": 0.9817, "step": 269 }, { "epoch": 0.2591170825335892, "grad_norm": 0.107421875, "learning_rate": 7.408829174664108e-06, "loss": 0.999, "step": 270 }, { "epoch": 0.2600767754318618, "grad_norm": 0.095703125, "learning_rate": 7.399232245681382e-06, "loss": 0.9768, "step": 271 }, { "epoch": 0.26103646833013433, "grad_norm": 0.11376953125, "learning_rate": 7.389635316698657e-06, "loss": 1.0208, "step": 272 }, { "epoch": 0.2619961612284069, "grad_norm": 0.107421875, "learning_rate": 7.380038387715931e-06, "loss": 0.9654, "step": 273 }, { "epoch": 0.2629558541266795, "grad_norm": 0.1025390625, "learning_rate": 7.370441458733206e-06, "loss": 0.9188, "step": 274 }, { "epoch": 0.263915547024952, "grad_norm": 0.09912109375, "learning_rate": 7.3608445297504805e-06, "loss": 0.98, "step": 275 }, { "epoch": 0.2648752399232246, "grad_norm": 0.10595703125, "learning_rate": 7.351247600767755e-06, "loss": 0.9927, "step": 276 }, { "epoch": 0.2658349328214971, "grad_norm": 0.123046875, "learning_rate": 7.341650671785029e-06, "loss": 1.0048, "step": 277 }, { "epoch": 0.2667946257197697, "grad_norm": 0.10791015625, "learning_rate": 7.332053742802304e-06, "loss": 0.9771, "step": 278 }, { "epoch": 0.2677543186180422, "grad_norm": 0.115234375, "learning_rate": 7.322456813819579e-06, "loss": 1.0225, "step": 279 }, { "epoch": 0.2687140115163148, "grad_norm": 0.11767578125, "learning_rate": 7.312859884836853e-06, "loss": 0.9999, "step": 280 }, { "epoch": 0.2696737044145873, "grad_norm": 0.09521484375, "learning_rate": 7.3032629558541264e-06, "loss": 0.9115, "step": 281 }, { "epoch": 0.2706333973128599, "grad_norm": 0.0859375, "learning_rate": 7.2936660268714024e-06, "loss": 0.9408, "step": 282 }, { "epoch": 0.2715930902111324, "grad_norm": 0.095703125, "learning_rate": 7.284069097888676e-06, "loss": 0.8829, "step": 283 }, { "epoch": 0.272552783109405, "grad_norm": 0.1357421875, "learning_rate": 7.27447216890595e-06, "loss": 0.9513, "step": 284 }, { "epoch": 0.27351247600767753, "grad_norm": 0.09423828125, "learning_rate": 7.264875239923225e-06, "loss": 0.8901, "step": 285 }, { "epoch": 0.2744721689059501, "grad_norm": 0.09375, "learning_rate": 7.2552783109405e-06, "loss": 0.9344, "step": 286 }, { "epoch": 0.27543186180422263, "grad_norm": 0.09521484375, "learning_rate": 7.245681381957774e-06, "loss": 0.9692, "step": 287 }, { "epoch": 0.2763915547024952, "grad_norm": 0.09814453125, "learning_rate": 7.236084452975048e-06, "loss": 0.954, "step": 288 }, { "epoch": 0.27735124760076774, "grad_norm": 0.1181640625, "learning_rate": 7.226487523992323e-06, "loss": 1.0483, "step": 289 }, { "epoch": 0.2783109404990403, "grad_norm": 0.1083984375, "learning_rate": 7.216890595009598e-06, "loss": 1.0348, "step": 290 }, { "epoch": 0.27927063339731284, "grad_norm": 0.0966796875, "learning_rate": 7.207293666026872e-06, "loss": 0.9232, "step": 291 }, { "epoch": 0.2802303262955854, "grad_norm": 0.09228515625, "learning_rate": 7.1976967370441466e-06, "loss": 0.9508, "step": 292 }, { "epoch": 0.28119001919385794, "grad_norm": 0.10986328125, "learning_rate": 7.188099808061421e-06, "loss": 0.9915, "step": 293 }, { "epoch": 0.2821497120921305, "grad_norm": 0.0859375, "learning_rate": 7.178502879078696e-06, "loss": 0.8884, "step": 294 }, { "epoch": 0.28310940499040305, "grad_norm": 0.09326171875, "learning_rate": 7.16890595009597e-06, "loss": 0.9609, "step": 295 }, { "epoch": 0.2840690978886756, "grad_norm": 0.10205078125, "learning_rate": 7.159309021113245e-06, "loss": 0.9797, "step": 296 }, { "epoch": 0.28502879078694815, "grad_norm": 0.09521484375, "learning_rate": 7.149712092130518e-06, "loss": 0.9791, "step": 297 }, { "epoch": 0.28598848368522073, "grad_norm": 0.1064453125, "learning_rate": 7.1401151631477925e-06, "loss": 0.8977, "step": 298 }, { "epoch": 0.2869481765834933, "grad_norm": 0.10302734375, "learning_rate": 7.130518234165068e-06, "loss": 0.8998, "step": 299 }, { "epoch": 0.28790786948176583, "grad_norm": 0.1103515625, "learning_rate": 7.120921305182342e-06, "loss": 0.9845, "step": 300 }, { "epoch": 0.2888675623800384, "grad_norm": 0.107421875, "learning_rate": 7.111324376199616e-06, "loss": 0.967, "step": 301 }, { "epoch": 0.28982725527831094, "grad_norm": 0.11181640625, "learning_rate": 7.101727447216891e-06, "loss": 0.9695, "step": 302 }, { "epoch": 0.2907869481765835, "grad_norm": 0.109375, "learning_rate": 7.092130518234166e-06, "loss": 0.9427, "step": 303 }, { "epoch": 0.29174664107485604, "grad_norm": 0.109375, "learning_rate": 7.08253358925144e-06, "loss": 0.9487, "step": 304 }, { "epoch": 0.2927063339731286, "grad_norm": 0.1396484375, "learning_rate": 7.0729366602687145e-06, "loss": 0.9567, "step": 305 }, { "epoch": 0.29366602687140114, "grad_norm": 0.1494140625, "learning_rate": 7.063339731285989e-06, "loss": 1.039, "step": 306 }, { "epoch": 0.2946257197696737, "grad_norm": 0.103515625, "learning_rate": 7.053742802303264e-06, "loss": 0.9293, "step": 307 }, { "epoch": 0.29558541266794625, "grad_norm": 0.103515625, "learning_rate": 7.044145873320538e-06, "loss": 0.9293, "step": 308 }, { "epoch": 0.2965451055662188, "grad_norm": 0.11572265625, "learning_rate": 7.034548944337813e-06, "loss": 1.0326, "step": 309 }, { "epoch": 0.29750479846449135, "grad_norm": 0.09619140625, "learning_rate": 7.024952015355086e-06, "loss": 0.9228, "step": 310 }, { "epoch": 0.29846449136276393, "grad_norm": 0.1171875, "learning_rate": 7.015355086372362e-06, "loss": 1.0134, "step": 311 }, { "epoch": 0.29942418426103645, "grad_norm": 0.09521484375, "learning_rate": 7.005758157389636e-06, "loss": 0.8992, "step": 312 }, { "epoch": 0.30038387715930903, "grad_norm": 0.10400390625, "learning_rate": 6.99616122840691e-06, "loss": 0.9474, "step": 313 }, { "epoch": 0.30134357005758156, "grad_norm": 0.11767578125, "learning_rate": 6.986564299424184e-06, "loss": 0.9854, "step": 314 }, { "epoch": 0.30230326295585414, "grad_norm": 0.087890625, "learning_rate": 6.9769673704414595e-06, "loss": 0.9244, "step": 315 }, { "epoch": 0.30326295585412666, "grad_norm": 0.08740234375, "learning_rate": 6.967370441458734e-06, "loss": 0.9114, "step": 316 }, { "epoch": 0.30422264875239924, "grad_norm": 0.095703125, "learning_rate": 6.957773512476008e-06, "loss": 0.9245, "step": 317 }, { "epoch": 0.30518234165067176, "grad_norm": 0.09228515625, "learning_rate": 6.9481765834932824e-06, "loss": 0.9606, "step": 318 }, { "epoch": 0.30614203454894434, "grad_norm": 0.1318359375, "learning_rate": 6.938579654510558e-06, "loss": 1.056, "step": 319 }, { "epoch": 0.30710172744721687, "grad_norm": 0.11083984375, "learning_rate": 6.928982725527832e-06, "loss": 0.9928, "step": 320 }, { "epoch": 0.30806142034548945, "grad_norm": 0.09765625, "learning_rate": 6.919385796545106e-06, "loss": 0.9356, "step": 321 }, { "epoch": 0.30902111324376197, "grad_norm": 0.08837890625, "learning_rate": 6.909788867562381e-06, "loss": 0.8933, "step": 322 }, { "epoch": 0.30998080614203455, "grad_norm": 0.0927734375, "learning_rate": 6.900191938579655e-06, "loss": 0.9043, "step": 323 }, { "epoch": 0.31094049904030713, "grad_norm": 0.09765625, "learning_rate": 6.89059500959693e-06, "loss": 0.9646, "step": 324 }, { "epoch": 0.31190019193857965, "grad_norm": 0.10400390625, "learning_rate": 6.8809980806142044e-06, "loss": 0.9541, "step": 325 }, { "epoch": 0.31285988483685223, "grad_norm": 0.0908203125, "learning_rate": 6.871401151631478e-06, "loss": 0.901, "step": 326 }, { "epoch": 0.31381957773512476, "grad_norm": 0.103515625, "learning_rate": 6.861804222648752e-06, "loss": 0.9409, "step": 327 }, { "epoch": 0.31477927063339733, "grad_norm": 0.1259765625, "learning_rate": 6.852207293666027e-06, "loss": 1.0008, "step": 328 }, { "epoch": 0.31573896353166986, "grad_norm": 0.09423828125, "learning_rate": 6.842610364683302e-06, "loss": 0.9406, "step": 329 }, { "epoch": 0.31669865642994244, "grad_norm": 0.09912109375, "learning_rate": 6.833013435700576e-06, "loss": 0.9532, "step": 330 }, { "epoch": 0.31765834932821496, "grad_norm": 0.1083984375, "learning_rate": 6.82341650671785e-06, "loss": 0.9948, "step": 331 }, { "epoch": 0.31861804222648754, "grad_norm": 0.1220703125, "learning_rate": 6.8138195777351256e-06, "loss": 0.9763, "step": 332 }, { "epoch": 0.31957773512476007, "grad_norm": 0.09228515625, "learning_rate": 6.8042226487524e-06, "loss": 0.8708, "step": 333 }, { "epoch": 0.32053742802303264, "grad_norm": 0.0859375, "learning_rate": 6.794625719769674e-06, "loss": 0.922, "step": 334 }, { "epoch": 0.32149712092130517, "grad_norm": 0.0810546875, "learning_rate": 6.7850287907869485e-06, "loss": 0.9056, "step": 335 }, { "epoch": 0.32245681381957775, "grad_norm": 0.09716796875, "learning_rate": 6.775431861804224e-06, "loss": 0.9891, "step": 336 }, { "epoch": 0.32341650671785027, "grad_norm": 0.10400390625, "learning_rate": 6.765834932821498e-06, "loss": 0.9317, "step": 337 }, { "epoch": 0.32437619961612285, "grad_norm": 0.1123046875, "learning_rate": 6.756238003838772e-06, "loss": 1.0119, "step": 338 }, { "epoch": 0.3253358925143954, "grad_norm": 0.10498046875, "learning_rate": 6.746641074856046e-06, "loss": 0.9928, "step": 339 }, { "epoch": 0.32629558541266795, "grad_norm": 0.10986328125, "learning_rate": 6.737044145873322e-06, "loss": 1.0083, "step": 340 }, { "epoch": 0.3272552783109405, "grad_norm": 0.0966796875, "learning_rate": 6.727447216890595e-06, "loss": 0.9153, "step": 341 }, { "epoch": 0.32821497120921306, "grad_norm": 0.1025390625, "learning_rate": 6.71785028790787e-06, "loss": 0.9593, "step": 342 }, { "epoch": 0.3291746641074856, "grad_norm": 0.1064453125, "learning_rate": 6.708253358925144e-06, "loss": 0.9167, "step": 343 }, { "epoch": 0.33013435700575816, "grad_norm": 0.080078125, "learning_rate": 6.698656429942419e-06, "loss": 0.8708, "step": 344 }, { "epoch": 0.3310940499040307, "grad_norm": 0.0947265625, "learning_rate": 6.6890595009596935e-06, "loss": 0.9276, "step": 345 }, { "epoch": 0.33205374280230326, "grad_norm": 0.107421875, "learning_rate": 6.679462571976968e-06, "loss": 1.0077, "step": 346 }, { "epoch": 0.3330134357005758, "grad_norm": 0.11376953125, "learning_rate": 6.669865642994242e-06, "loss": 1.0066, "step": 347 }, { "epoch": 0.33397312859884837, "grad_norm": 0.10595703125, "learning_rate": 6.6602687140115165e-06, "loss": 0.8746, "step": 348 }, { "epoch": 0.33493282149712095, "grad_norm": 0.1044921875, "learning_rate": 6.650671785028792e-06, "loss": 0.9443, "step": 349 }, { "epoch": 0.33589251439539347, "grad_norm": 0.0888671875, "learning_rate": 6.641074856046066e-06, "loss": 0.9567, "step": 350 }, { "epoch": 0.33685220729366605, "grad_norm": 0.1376953125, "learning_rate": 6.63147792706334e-06, "loss": 1.1195, "step": 351 }, { "epoch": 0.3378119001919386, "grad_norm": 0.0830078125, "learning_rate": 6.621880998080615e-06, "loss": 0.9184, "step": 352 }, { "epoch": 0.33877159309021115, "grad_norm": 0.0888671875, "learning_rate": 6.61228406909789e-06, "loss": 0.9047, "step": 353 }, { "epoch": 0.3397312859884837, "grad_norm": 0.0703125, "learning_rate": 6.602687140115164e-06, "loss": 0.8174, "step": 354 }, { "epoch": 0.34069097888675626, "grad_norm": 0.0927734375, "learning_rate": 6.593090211132438e-06, "loss": 0.894, "step": 355 }, { "epoch": 0.3416506717850288, "grad_norm": 0.09521484375, "learning_rate": 6.583493282149712e-06, "loss": 0.9048, "step": 356 }, { "epoch": 0.34261036468330136, "grad_norm": 0.1015625, "learning_rate": 6.573896353166987e-06, "loss": 0.9497, "step": 357 }, { "epoch": 0.3435700575815739, "grad_norm": 0.103515625, "learning_rate": 6.5642994241842614e-06, "loss": 0.9741, "step": 358 }, { "epoch": 0.34452975047984646, "grad_norm": 0.109375, "learning_rate": 6.554702495201536e-06, "loss": 0.9669, "step": 359 }, { "epoch": 0.345489443378119, "grad_norm": 0.107421875, "learning_rate": 6.54510556621881e-06, "loss": 0.9659, "step": 360 }, { "epoch": 0.34644913627639157, "grad_norm": 0.076171875, "learning_rate": 6.535508637236085e-06, "loss": 0.8867, "step": 361 }, { "epoch": 0.3474088291746641, "grad_norm": 0.08984375, "learning_rate": 6.52591170825336e-06, "loss": 0.9019, "step": 362 }, { "epoch": 0.34836852207293667, "grad_norm": 0.103515625, "learning_rate": 6.516314779270634e-06, "loss": 0.8726, "step": 363 }, { "epoch": 0.3493282149712092, "grad_norm": 0.08740234375, "learning_rate": 6.506717850287908e-06, "loss": 0.895, "step": 364 }, { "epoch": 0.3502879078694818, "grad_norm": 0.103515625, "learning_rate": 6.497120921305183e-06, "loss": 0.8961, "step": 365 }, { "epoch": 0.3512476007677543, "grad_norm": 0.08203125, "learning_rate": 6.487523992322458e-06, "loss": 0.889, "step": 366 }, { "epoch": 0.3522072936660269, "grad_norm": 0.0908203125, "learning_rate": 6.477927063339732e-06, "loss": 0.8605, "step": 367 }, { "epoch": 0.3531669865642994, "grad_norm": 0.11865234375, "learning_rate": 6.4683301343570056e-06, "loss": 0.9347, "step": 368 }, { "epoch": 0.354126679462572, "grad_norm": 0.1455078125, "learning_rate": 6.4587332053742816e-06, "loss": 1.0203, "step": 369 }, { "epoch": 0.3550863723608445, "grad_norm": 0.115234375, "learning_rate": 6.449136276391556e-06, "loss": 1.0568, "step": 370 }, { "epoch": 0.3560460652591171, "grad_norm": 0.12890625, "learning_rate": 6.439539347408829e-06, "loss": 1.0349, "step": 371 }, { "epoch": 0.3570057581573896, "grad_norm": 0.08251953125, "learning_rate": 6.429942418426104e-06, "loss": 0.8929, "step": 372 }, { "epoch": 0.3579654510556622, "grad_norm": 0.1572265625, "learning_rate": 6.420345489443378e-06, "loss": 1.0427, "step": 373 }, { "epoch": 0.35892514395393477, "grad_norm": 0.09521484375, "learning_rate": 6.410748560460653e-06, "loss": 0.8672, "step": 374 }, { "epoch": 0.3598848368522073, "grad_norm": 0.12255859375, "learning_rate": 6.4011516314779275e-06, "loss": 0.8907, "step": 375 }, { "epoch": 0.36084452975047987, "grad_norm": 0.09033203125, "learning_rate": 6.391554702495202e-06, "loss": 0.9077, "step": 376 }, { "epoch": 0.3618042226487524, "grad_norm": 0.0791015625, "learning_rate": 6.381957773512476e-06, "loss": 0.8758, "step": 377 }, { "epoch": 0.362763915547025, "grad_norm": 0.1005859375, "learning_rate": 6.372360844529751e-06, "loss": 0.8745, "step": 378 }, { "epoch": 0.3637236084452975, "grad_norm": 0.0986328125, "learning_rate": 6.362763915547026e-06, "loss": 0.8887, "step": 379 }, { "epoch": 0.3646833013435701, "grad_norm": 0.08837890625, "learning_rate": 6.3531669865643e-06, "loss": 0.8776, "step": 380 }, { "epoch": 0.3656429942418426, "grad_norm": 0.08544921875, "learning_rate": 6.343570057581574e-06, "loss": 0.9301, "step": 381 }, { "epoch": 0.3666026871401152, "grad_norm": 0.08740234375, "learning_rate": 6.3339731285988495e-06, "loss": 0.8878, "step": 382 }, { "epoch": 0.3675623800383877, "grad_norm": 0.1279296875, "learning_rate": 6.324376199616124e-06, "loss": 0.8949, "step": 383 }, { "epoch": 0.3685220729366603, "grad_norm": 0.09375, "learning_rate": 6.314779270633397e-06, "loss": 0.9057, "step": 384 }, { "epoch": 0.3694817658349328, "grad_norm": 0.08935546875, "learning_rate": 6.305182341650672e-06, "loss": 0.8729, "step": 385 }, { "epoch": 0.3704414587332054, "grad_norm": 0.0869140625, "learning_rate": 6.295585412667947e-06, "loss": 0.9009, "step": 386 }, { "epoch": 0.3714011516314779, "grad_norm": 0.0869140625, "learning_rate": 6.285988483685221e-06, "loss": 0.8487, "step": 387 }, { "epoch": 0.3723608445297505, "grad_norm": 0.095703125, "learning_rate": 6.2763915547024955e-06, "loss": 0.862, "step": 388 }, { "epoch": 0.373320537428023, "grad_norm": 0.10205078125, "learning_rate": 6.26679462571977e-06, "loss": 0.936, "step": 389 }, { "epoch": 0.3742802303262956, "grad_norm": 0.0830078125, "learning_rate": 6.257197696737045e-06, "loss": 0.9083, "step": 390 }, { "epoch": 0.3752399232245681, "grad_norm": 0.10009765625, "learning_rate": 6.247600767754319e-06, "loss": 0.9089, "step": 391 }, { "epoch": 0.3761996161228407, "grad_norm": 0.16015625, "learning_rate": 6.238003838771594e-06, "loss": 1.1005, "step": 392 }, { "epoch": 0.3771593090211132, "grad_norm": 0.08935546875, "learning_rate": 6.228406909788868e-06, "loss": 0.8175, "step": 393 }, { "epoch": 0.3781190019193858, "grad_norm": 0.08740234375, "learning_rate": 6.218809980806143e-06, "loss": 0.8441, "step": 394 }, { "epoch": 0.3790786948176583, "grad_norm": 0.0947265625, "learning_rate": 6.2092130518234175e-06, "loss": 0.9371, "step": 395 }, { "epoch": 0.3800383877159309, "grad_norm": 0.10400390625, "learning_rate": 6.199616122840692e-06, "loss": 1.0151, "step": 396 }, { "epoch": 0.3809980806142035, "grad_norm": 0.130859375, "learning_rate": 6.190019193857965e-06, "loss": 0.9341, "step": 397 }, { "epoch": 0.381957773512476, "grad_norm": 0.0966796875, "learning_rate": 6.18042226487524e-06, "loss": 0.9144, "step": 398 }, { "epoch": 0.3829174664107486, "grad_norm": 0.0947265625, "learning_rate": 6.170825335892516e-06, "loss": 0.9452, "step": 399 }, { "epoch": 0.3838771593090211, "grad_norm": 0.1103515625, "learning_rate": 6.161228406909789e-06, "loss": 1.0361, "step": 400 }, { "epoch": 0.3848368522072937, "grad_norm": 0.10400390625, "learning_rate": 6.151631477927063e-06, "loss": 0.9116, "step": 401 }, { "epoch": 0.3857965451055662, "grad_norm": 0.11328125, "learning_rate": 6.142034548944338e-06, "loss": 0.8891, "step": 402 }, { "epoch": 0.3867562380038388, "grad_norm": 0.08447265625, "learning_rate": 6.132437619961613e-06, "loss": 0.8676, "step": 403 }, { "epoch": 0.3877159309021113, "grad_norm": 0.11083984375, "learning_rate": 6.122840690978887e-06, "loss": 0.9296, "step": 404 }, { "epoch": 0.3886756238003839, "grad_norm": 0.119140625, "learning_rate": 6.1132437619961616e-06, "loss": 0.9911, "step": 405 }, { "epoch": 0.3896353166986564, "grad_norm": 0.09423828125, "learning_rate": 6.103646833013436e-06, "loss": 0.9256, "step": 406 }, { "epoch": 0.390595009596929, "grad_norm": 0.083984375, "learning_rate": 6.094049904030711e-06, "loss": 0.8496, "step": 407 }, { "epoch": 0.3915547024952015, "grad_norm": 0.08642578125, "learning_rate": 6.084452975047985e-06, "loss": 0.8917, "step": 408 }, { "epoch": 0.3925143953934741, "grad_norm": 0.1005859375, "learning_rate": 6.07485604606526e-06, "loss": 0.9327, "step": 409 }, { "epoch": 0.3934740882917466, "grad_norm": 0.091796875, "learning_rate": 6.065259117082534e-06, "loss": 0.8882, "step": 410 }, { "epoch": 0.3944337811900192, "grad_norm": 0.09423828125, "learning_rate": 6.055662188099809e-06, "loss": 0.9488, "step": 411 }, { "epoch": 0.39539347408829173, "grad_norm": 0.09765625, "learning_rate": 6.0460652591170836e-06, "loss": 0.9024, "step": 412 }, { "epoch": 0.3963531669865643, "grad_norm": 0.1015625, "learning_rate": 6.036468330134357e-06, "loss": 0.9393, "step": 413 }, { "epoch": 0.39731285988483683, "grad_norm": 0.103515625, "learning_rate": 6.026871401151631e-06, "loss": 0.8741, "step": 414 }, { "epoch": 0.3982725527831094, "grad_norm": 0.103515625, "learning_rate": 6.0172744721689065e-06, "loss": 0.9609, "step": 415 }, { "epoch": 0.39923224568138194, "grad_norm": 0.0947265625, "learning_rate": 6.007677543186181e-06, "loss": 0.9116, "step": 416 }, { "epoch": 0.4001919385796545, "grad_norm": 0.109375, "learning_rate": 5.998080614203455e-06, "loss": 0.9607, "step": 417 }, { "epoch": 0.40115163147792704, "grad_norm": 0.10400390625, "learning_rate": 5.9884836852207295e-06, "loss": 0.9153, "step": 418 }, { "epoch": 0.4021113243761996, "grad_norm": 0.08837890625, "learning_rate": 5.978886756238005e-06, "loss": 0.8719, "step": 419 }, { "epoch": 0.40307101727447214, "grad_norm": 0.1015625, "learning_rate": 5.969289827255279e-06, "loss": 0.9437, "step": 420 }, { "epoch": 0.4040307101727447, "grad_norm": 0.0908203125, "learning_rate": 5.959692898272553e-06, "loss": 0.9401, "step": 421 }, { "epoch": 0.4049904030710173, "grad_norm": 0.115234375, "learning_rate": 5.950095969289828e-06, "loss": 0.9458, "step": 422 }, { "epoch": 0.4059500959692898, "grad_norm": 0.1552734375, "learning_rate": 5.940499040307102e-06, "loss": 1.0006, "step": 423 }, { "epoch": 0.4069097888675624, "grad_norm": 0.0888671875, "learning_rate": 5.930902111324377e-06, "loss": 0.8507, "step": 424 }, { "epoch": 0.40786948176583493, "grad_norm": 0.09423828125, "learning_rate": 5.9213051823416515e-06, "loss": 0.9471, "step": 425 }, { "epoch": 0.4088291746641075, "grad_norm": 0.10107421875, "learning_rate": 5.911708253358925e-06, "loss": 0.9143, "step": 426 }, { "epoch": 0.40978886756238003, "grad_norm": 0.08984375, "learning_rate": 5.902111324376199e-06, "loss": 0.9904, "step": 427 }, { "epoch": 0.4107485604606526, "grad_norm": 0.1015625, "learning_rate": 5.892514395393475e-06, "loss": 0.9509, "step": 428 }, { "epoch": 0.41170825335892514, "grad_norm": 0.10791015625, "learning_rate": 5.882917466410749e-06, "loss": 0.9433, "step": 429 }, { "epoch": 0.4126679462571977, "grad_norm": 0.095703125, "learning_rate": 5.873320537428023e-06, "loss": 0.9041, "step": 430 }, { "epoch": 0.41362763915547024, "grad_norm": 0.09423828125, "learning_rate": 5.8637236084452975e-06, "loss": 0.8894, "step": 431 }, { "epoch": 0.4145873320537428, "grad_norm": 0.11572265625, "learning_rate": 5.854126679462573e-06, "loss": 0.9713, "step": 432 }, { "epoch": 0.41554702495201534, "grad_norm": 0.0830078125, "learning_rate": 5.844529750479847e-06, "loss": 0.8676, "step": 433 }, { "epoch": 0.4165067178502879, "grad_norm": 0.08203125, "learning_rate": 5.834932821497121e-06, "loss": 0.9207, "step": 434 }, { "epoch": 0.41746641074856045, "grad_norm": 0.10205078125, "learning_rate": 5.825335892514396e-06, "loss": 1.011, "step": 435 }, { "epoch": 0.418426103646833, "grad_norm": 0.1318359375, "learning_rate": 5.815738963531671e-06, "loss": 0.9231, "step": 436 }, { "epoch": 0.41938579654510555, "grad_norm": 0.09619140625, "learning_rate": 5.806142034548945e-06, "loss": 0.941, "step": 437 }, { "epoch": 0.42034548944337813, "grad_norm": 0.2001953125, "learning_rate": 5.7965451055662194e-06, "loss": 1.0724, "step": 438 }, { "epoch": 0.42130518234165065, "grad_norm": 0.0888671875, "learning_rate": 5.786948176583494e-06, "loss": 0.9221, "step": 439 }, { "epoch": 0.42226487523992323, "grad_norm": 0.12060546875, "learning_rate": 5.777351247600769e-06, "loss": 1.0565, "step": 440 }, { "epoch": 0.42322456813819576, "grad_norm": 0.1083984375, "learning_rate": 5.767754318618043e-06, "loss": 0.984, "step": 441 }, { "epoch": 0.42418426103646834, "grad_norm": 0.09912109375, "learning_rate": 5.758157389635317e-06, "loss": 0.9393, "step": 442 }, { "epoch": 0.42514395393474086, "grad_norm": 0.111328125, "learning_rate": 5.748560460652591e-06, "loss": 0.9928, "step": 443 }, { "epoch": 0.42610364683301344, "grad_norm": 0.11376953125, "learning_rate": 5.738963531669866e-06, "loss": 1.0521, "step": 444 }, { "epoch": 0.42706333973128596, "grad_norm": 0.107421875, "learning_rate": 5.7293666026871406e-06, "loss": 0.9167, "step": 445 }, { "epoch": 0.42802303262955854, "grad_norm": 0.109375, "learning_rate": 5.719769673704415e-06, "loss": 0.959, "step": 446 }, { "epoch": 0.4289827255278311, "grad_norm": 0.10400390625, "learning_rate": 5.710172744721689e-06, "loss": 0.8617, "step": 447 }, { "epoch": 0.42994241842610365, "grad_norm": 0.0966796875, "learning_rate": 5.7005758157389635e-06, "loss": 0.9925, "step": 448 }, { "epoch": 0.4309021113243762, "grad_norm": 0.1259765625, "learning_rate": 5.690978886756239e-06, "loss": 1.026, "step": 449 }, { "epoch": 0.43186180422264875, "grad_norm": 0.08349609375, "learning_rate": 5.681381957773513e-06, "loss": 0.8937, "step": 450 }, { "epoch": 0.43282149712092133, "grad_norm": 0.09619140625, "learning_rate": 5.671785028790787e-06, "loss": 0.8959, "step": 451 }, { "epoch": 0.43378119001919385, "grad_norm": 0.119140625, "learning_rate": 5.662188099808062e-06, "loss": 0.9824, "step": 452 }, { "epoch": 0.43474088291746643, "grad_norm": 0.0986328125, "learning_rate": 5.652591170825337e-06, "loss": 0.912, "step": 453 }, { "epoch": 0.43570057581573896, "grad_norm": 0.10009765625, "learning_rate": 5.642994241842611e-06, "loss": 0.8847, "step": 454 }, { "epoch": 0.43666026871401153, "grad_norm": 0.09228515625, "learning_rate": 5.6333973128598855e-06, "loss": 0.8793, "step": 455 }, { "epoch": 0.43761996161228406, "grad_norm": 0.09326171875, "learning_rate": 5.623800383877159e-06, "loss": 0.8788, "step": 456 }, { "epoch": 0.43857965451055664, "grad_norm": 0.08544921875, "learning_rate": 5.614203454894435e-06, "loss": 0.9116, "step": 457 }, { "epoch": 0.43953934740882916, "grad_norm": 0.103515625, "learning_rate": 5.6046065259117085e-06, "loss": 0.8685, "step": 458 }, { "epoch": 0.44049904030710174, "grad_norm": 0.0859375, "learning_rate": 5.595009596928983e-06, "loss": 0.8572, "step": 459 }, { "epoch": 0.44145873320537427, "grad_norm": 0.09716796875, "learning_rate": 5.585412667946257e-06, "loss": 0.8928, "step": 460 }, { "epoch": 0.44241842610364684, "grad_norm": 0.1943359375, "learning_rate": 5.575815738963532e-06, "loss": 0.8916, "step": 461 }, { "epoch": 0.44337811900191937, "grad_norm": 0.0869140625, "learning_rate": 5.566218809980807e-06, "loss": 0.8626, "step": 462 }, { "epoch": 0.44433781190019195, "grad_norm": 0.10693359375, "learning_rate": 5.556621880998081e-06, "loss": 0.9158, "step": 463 }, { "epoch": 0.44529750479846447, "grad_norm": 0.10888671875, "learning_rate": 5.547024952015355e-06, "loss": 0.8666, "step": 464 }, { "epoch": 0.44625719769673705, "grad_norm": 0.0830078125, "learning_rate": 5.5374280230326305e-06, "loss": 0.9601, "step": 465 }, { "epoch": 0.4472168905950096, "grad_norm": 0.09765625, "learning_rate": 5.527831094049905e-06, "loss": 0.9191, "step": 466 }, { "epoch": 0.44817658349328215, "grad_norm": 0.11181640625, "learning_rate": 5.518234165067179e-06, "loss": 1.1148, "step": 467 }, { "epoch": 0.4491362763915547, "grad_norm": 0.0908203125, "learning_rate": 5.5086372360844535e-06, "loss": 0.8935, "step": 468 }, { "epoch": 0.45009596928982726, "grad_norm": 0.1005859375, "learning_rate": 5.499040307101729e-06, "loss": 0.9715, "step": 469 }, { "epoch": 0.4510556621880998, "grad_norm": 0.0927734375, "learning_rate": 5.489443378119003e-06, "loss": 0.9087, "step": 470 }, { "epoch": 0.45201535508637236, "grad_norm": 0.09228515625, "learning_rate": 5.4798464491362765e-06, "loss": 0.912, "step": 471 }, { "epoch": 0.45297504798464494, "grad_norm": 0.1015625, "learning_rate": 5.470249520153551e-06, "loss": 0.9182, "step": 472 }, { "epoch": 0.45393474088291746, "grad_norm": 0.0830078125, "learning_rate": 5.460652591170825e-06, "loss": 0.8728, "step": 473 }, { "epoch": 0.45489443378119004, "grad_norm": 0.0908203125, "learning_rate": 5.4510556621881e-06, "loss": 0.9494, "step": 474 }, { "epoch": 0.45585412667946257, "grad_norm": 0.08544921875, "learning_rate": 5.441458733205375e-06, "loss": 0.8172, "step": 475 }, { "epoch": 0.45681381957773515, "grad_norm": 0.09228515625, "learning_rate": 5.431861804222649e-06, "loss": 0.9106, "step": 476 }, { "epoch": 0.45777351247600767, "grad_norm": 0.1015625, "learning_rate": 5.422264875239923e-06, "loss": 0.958, "step": 477 }, { "epoch": 0.45873320537428025, "grad_norm": 0.08642578125, "learning_rate": 5.4126679462571984e-06, "loss": 0.8861, "step": 478 }, { "epoch": 0.4596928982725528, "grad_norm": 0.0908203125, "learning_rate": 5.403071017274473e-06, "loss": 0.8868, "step": 479 }, { "epoch": 0.46065259117082535, "grad_norm": 0.10986328125, "learning_rate": 5.393474088291747e-06, "loss": 0.954, "step": 480 }, { "epoch": 0.4616122840690979, "grad_norm": 0.08642578125, "learning_rate": 5.383877159309021e-06, "loss": 0.9156, "step": 481 }, { "epoch": 0.46257197696737046, "grad_norm": 0.09228515625, "learning_rate": 5.374280230326297e-06, "loss": 0.8546, "step": 482 }, { "epoch": 0.463531669865643, "grad_norm": 0.10546875, "learning_rate": 5.364683301343571e-06, "loss": 0.8904, "step": 483 }, { "epoch": 0.46449136276391556, "grad_norm": 0.1064453125, "learning_rate": 5.355086372360845e-06, "loss": 0.92, "step": 484 }, { "epoch": 0.4654510556621881, "grad_norm": 0.0888671875, "learning_rate": 5.345489443378119e-06, "loss": 0.9414, "step": 485 }, { "epoch": 0.46641074856046066, "grad_norm": 0.08984375, "learning_rate": 5.335892514395395e-06, "loss": 0.852, "step": 486 }, { "epoch": 0.4673704414587332, "grad_norm": 0.1005859375, "learning_rate": 5.326295585412668e-06, "loss": 0.8742, "step": 487 }, { "epoch": 0.46833013435700577, "grad_norm": 0.1142578125, "learning_rate": 5.3166986564299425e-06, "loss": 0.9099, "step": 488 }, { "epoch": 0.4692898272552783, "grad_norm": 0.08984375, "learning_rate": 5.307101727447217e-06, "loss": 0.8932, "step": 489 }, { "epoch": 0.47024952015355087, "grad_norm": 0.09423828125, "learning_rate": 5.297504798464492e-06, "loss": 0.9031, "step": 490 }, { "epoch": 0.4712092130518234, "grad_norm": 0.1083984375, "learning_rate": 5.287907869481766e-06, "loss": 0.9625, "step": 491 }, { "epoch": 0.472168905950096, "grad_norm": 0.09912109375, "learning_rate": 5.278310940499041e-06, "loss": 0.8983, "step": 492 }, { "epoch": 0.4731285988483685, "grad_norm": 0.09375, "learning_rate": 5.268714011516315e-06, "loss": 0.8959, "step": 493 }, { "epoch": 0.4740882917466411, "grad_norm": 0.0947265625, "learning_rate": 5.25911708253359e-06, "loss": 0.8927, "step": 494 }, { "epoch": 0.4750479846449136, "grad_norm": 0.09326171875, "learning_rate": 5.2495201535508645e-06, "loss": 0.9237, "step": 495 }, { "epoch": 0.4760076775431862, "grad_norm": 0.10791015625, "learning_rate": 5.239923224568139e-06, "loss": 0.8485, "step": 496 }, { "epoch": 0.47696737044145876, "grad_norm": 0.08837890625, "learning_rate": 5.230326295585413e-06, "loss": 0.8559, "step": 497 }, { "epoch": 0.4779270633397313, "grad_norm": 0.1005859375, "learning_rate": 5.220729366602687e-06, "loss": 0.9235, "step": 498 }, { "epoch": 0.47888675623800386, "grad_norm": 0.08740234375, "learning_rate": 5.211132437619963e-06, "loss": 0.8661, "step": 499 }, { "epoch": 0.4798464491362764, "grad_norm": 0.09326171875, "learning_rate": 5.201535508637236e-06, "loss": 0.8836, "step": 500 }, { "epoch": 0.48080614203454897, "grad_norm": 0.0859375, "learning_rate": 5.1919385796545105e-06, "loss": 0.8703, "step": 501 }, { "epoch": 0.4817658349328215, "grad_norm": 0.0830078125, "learning_rate": 5.182341650671785e-06, "loss": 0.8234, "step": 502 }, { "epoch": 0.48272552783109407, "grad_norm": 0.091796875, "learning_rate": 5.17274472168906e-06, "loss": 0.9394, "step": 503 }, { "epoch": 0.4836852207293666, "grad_norm": 0.103515625, "learning_rate": 5.163147792706334e-06, "loss": 0.9818, "step": 504 }, { "epoch": 0.4846449136276392, "grad_norm": 0.08203125, "learning_rate": 5.153550863723609e-06, "loss": 0.8652, "step": 505 }, { "epoch": 0.4856046065259117, "grad_norm": 0.10107421875, "learning_rate": 5.143953934740883e-06, "loss": 0.8937, "step": 506 }, { "epoch": 0.4865642994241843, "grad_norm": 0.099609375, "learning_rate": 5.134357005758158e-06, "loss": 0.8381, "step": 507 }, { "epoch": 0.4875239923224568, "grad_norm": 0.10009765625, "learning_rate": 5.1247600767754325e-06, "loss": 0.9158, "step": 508 }, { "epoch": 0.4884836852207294, "grad_norm": 0.095703125, "learning_rate": 5.115163147792707e-06, "loss": 0.897, "step": 509 }, { "epoch": 0.4894433781190019, "grad_norm": 0.1015625, "learning_rate": 5.105566218809981e-06, "loss": 0.8864, "step": 510 }, { "epoch": 0.4904030710172745, "grad_norm": 0.099609375, "learning_rate": 5.095969289827256e-06, "loss": 0.8694, "step": 511 }, { "epoch": 0.491362763915547, "grad_norm": 0.119140625, "learning_rate": 5.086372360844531e-06, "loss": 1.0052, "step": 512 }, { "epoch": 0.4923224568138196, "grad_norm": 0.083984375, "learning_rate": 5.076775431861805e-06, "loss": 0.8901, "step": 513 }, { "epoch": 0.4932821497120921, "grad_norm": 0.09375, "learning_rate": 5.0671785028790784e-06, "loss": 0.8759, "step": 514 }, { "epoch": 0.4942418426103647, "grad_norm": 0.11865234375, "learning_rate": 5.0575815738963544e-06, "loss": 0.9608, "step": 515 }, { "epoch": 0.4952015355086372, "grad_norm": 0.08056640625, "learning_rate": 5.047984644913628e-06, "loss": 0.8765, "step": 516 }, { "epoch": 0.4961612284069098, "grad_norm": 0.08740234375, "learning_rate": 5.038387715930902e-06, "loss": 0.819, "step": 517 }, { "epoch": 0.4971209213051823, "grad_norm": 0.119140625, "learning_rate": 5.028790786948177e-06, "loss": 0.9235, "step": 518 }, { "epoch": 0.4980806142034549, "grad_norm": 0.09423828125, "learning_rate": 5.019193857965452e-06, "loss": 0.898, "step": 519 }, { "epoch": 0.4990403071017274, "grad_norm": 0.10009765625, "learning_rate": 5.009596928982726e-06, "loss": 0.8986, "step": 520 }, { "epoch": 0.5, "grad_norm": 0.10009765625, "learning_rate": 5e-06, "loss": 0.888, "step": 521 }, { "epoch": 0.5009596928982726, "grad_norm": 0.099609375, "learning_rate": 4.990403071017275e-06, "loss": 0.8961, "step": 522 }, { "epoch": 0.5019193857965452, "grad_norm": 0.10888671875, "learning_rate": 4.980806142034549e-06, "loss": 0.8657, "step": 523 }, { "epoch": 0.5028790786948176, "grad_norm": 0.09326171875, "learning_rate": 4.971209213051823e-06, "loss": 0.9533, "step": 524 }, { "epoch": 0.5038387715930902, "grad_norm": 0.1259765625, "learning_rate": 4.9616122840690986e-06, "loss": 0.8836, "step": 525 }, { "epoch": 0.5047984644913628, "grad_norm": 0.09765625, "learning_rate": 4.952015355086373e-06, "loss": 0.9135, "step": 526 }, { "epoch": 0.5057581573896354, "grad_norm": 0.142578125, "learning_rate": 4.942418426103647e-06, "loss": 0.9559, "step": 527 }, { "epoch": 0.5067178502879078, "grad_norm": 0.1025390625, "learning_rate": 4.9328214971209215e-06, "loss": 0.8754, "step": 528 }, { "epoch": 0.5076775431861804, "grad_norm": 0.09814453125, "learning_rate": 4.923224568138196e-06, "loss": 0.8919, "step": 529 }, { "epoch": 0.508637236084453, "grad_norm": 0.11572265625, "learning_rate": 4.91362763915547e-06, "loss": 1.0779, "step": 530 }, { "epoch": 0.5095969289827256, "grad_norm": 0.08935546875, "learning_rate": 4.904030710172745e-06, "loss": 0.9135, "step": 531 }, { "epoch": 0.510556621880998, "grad_norm": 0.078125, "learning_rate": 4.89443378119002e-06, "loss": 0.8455, "step": 532 }, { "epoch": 0.5115163147792706, "grad_norm": 0.087890625, "learning_rate": 4.884836852207294e-06, "loss": 0.8528, "step": 533 }, { "epoch": 0.5124760076775432, "grad_norm": 0.0908203125, "learning_rate": 4.875239923224568e-06, "loss": 0.932, "step": 534 }, { "epoch": 0.5134357005758158, "grad_norm": 0.09375, "learning_rate": 4.8656429942418435e-06, "loss": 0.8596, "step": 535 }, { "epoch": 0.5143953934740882, "grad_norm": 0.10400390625, "learning_rate": 4.856046065259117e-06, "loss": 0.9044, "step": 536 }, { "epoch": 0.5153550863723608, "grad_norm": 0.1484375, "learning_rate": 4.846449136276392e-06, "loss": 0.8278, "step": 537 }, { "epoch": 0.5163147792706334, "grad_norm": 0.09375, "learning_rate": 4.8368522072936665e-06, "loss": 0.8511, "step": 538 }, { "epoch": 0.517274472168906, "grad_norm": 0.11181640625, "learning_rate": 4.827255278310941e-06, "loss": 0.8579, "step": 539 }, { "epoch": 0.5182341650671785, "grad_norm": 0.09814453125, "learning_rate": 4.817658349328215e-06, "loss": 0.9334, "step": 540 }, { "epoch": 0.519193857965451, "grad_norm": 0.0986328125, "learning_rate": 4.80806142034549e-06, "loss": 0.897, "step": 541 }, { "epoch": 0.5201535508637236, "grad_norm": 0.0927734375, "learning_rate": 4.798464491362765e-06, "loss": 0.8945, "step": 542 }, { "epoch": 0.5211132437619962, "grad_norm": 0.09765625, "learning_rate": 4.788867562380039e-06, "loss": 0.938, "step": 543 }, { "epoch": 0.5220729366602687, "grad_norm": 0.08642578125, "learning_rate": 4.779270633397313e-06, "loss": 0.879, "step": 544 }, { "epoch": 0.5230326295585412, "grad_norm": 0.11328125, "learning_rate": 4.769673704414588e-06, "loss": 0.921, "step": 545 }, { "epoch": 0.5239923224568138, "grad_norm": 0.09033203125, "learning_rate": 4.760076775431862e-06, "loss": 0.9038, "step": 546 }, { "epoch": 0.5249520153550864, "grad_norm": 0.09814453125, "learning_rate": 4.750479846449136e-06, "loss": 0.8522, "step": 547 }, { "epoch": 0.525911708253359, "grad_norm": 0.1064453125, "learning_rate": 4.7408829174664115e-06, "loss": 0.8692, "step": 548 }, { "epoch": 0.5268714011516314, "grad_norm": 0.09375, "learning_rate": 4.731285988483685e-06, "loss": 0.9022, "step": 549 }, { "epoch": 0.527831094049904, "grad_norm": 0.09521484375, "learning_rate": 4.72168905950096e-06, "loss": 0.9225, "step": 550 }, { "epoch": 0.5287907869481766, "grad_norm": 0.09912109375, "learning_rate": 4.7120921305182344e-06, "loss": 0.9088, "step": 551 }, { "epoch": 0.5297504798464492, "grad_norm": 0.0927734375, "learning_rate": 4.702495201535509e-06, "loss": 0.8757, "step": 552 }, { "epoch": 0.5307101727447217, "grad_norm": 0.12890625, "learning_rate": 4.692898272552783e-06, "loss": 0.9915, "step": 553 }, { "epoch": 0.5316698656429942, "grad_norm": 0.1376953125, "learning_rate": 4.683301343570058e-06, "loss": 0.9778, "step": 554 }, { "epoch": 0.5326295585412668, "grad_norm": 0.1064453125, "learning_rate": 4.673704414587333e-06, "loss": 0.9743, "step": 555 }, { "epoch": 0.5335892514395394, "grad_norm": 0.10400390625, "learning_rate": 4.664107485604607e-06, "loss": 0.9756, "step": 556 }, { "epoch": 0.5345489443378119, "grad_norm": 0.146484375, "learning_rate": 4.654510556621881e-06, "loss": 0.9832, "step": 557 }, { "epoch": 0.5355086372360844, "grad_norm": 0.1064453125, "learning_rate": 4.644913627639156e-06, "loss": 0.9348, "step": 558 }, { "epoch": 0.536468330134357, "grad_norm": 0.09716796875, "learning_rate": 4.63531669865643e-06, "loss": 0.8675, "step": 559 }, { "epoch": 0.5374280230326296, "grad_norm": 0.099609375, "learning_rate": 4.625719769673705e-06, "loss": 0.9037, "step": 560 }, { "epoch": 0.5383877159309021, "grad_norm": 0.0927734375, "learning_rate": 4.616122840690979e-06, "loss": 0.9621, "step": 561 }, { "epoch": 0.5393474088291746, "grad_norm": 0.10400390625, "learning_rate": 4.606525911708254e-06, "loss": 0.9018, "step": 562 }, { "epoch": 0.5403071017274472, "grad_norm": 0.09912109375, "learning_rate": 4.596928982725528e-06, "loss": 0.8998, "step": 563 }, { "epoch": 0.5412667946257198, "grad_norm": 0.103515625, "learning_rate": 4.587332053742803e-06, "loss": 0.8812, "step": 564 }, { "epoch": 0.5422264875239923, "grad_norm": 0.08935546875, "learning_rate": 4.577735124760077e-06, "loss": 0.8627, "step": 565 }, { "epoch": 0.5431861804222649, "grad_norm": 0.0986328125, "learning_rate": 4.568138195777352e-06, "loss": 0.9064, "step": 566 }, { "epoch": 0.5441458733205374, "grad_norm": 0.09912109375, "learning_rate": 4.558541266794626e-06, "loss": 0.85, "step": 567 }, { "epoch": 0.54510556621881, "grad_norm": 0.10498046875, "learning_rate": 4.5489443378119005e-06, "loss": 0.9558, "step": 568 }, { "epoch": 0.5460652591170825, "grad_norm": 0.10302734375, "learning_rate": 4.539347408829175e-06, "loss": 0.9183, "step": 569 }, { "epoch": 0.5470249520153551, "grad_norm": 0.091796875, "learning_rate": 4.52975047984645e-06, "loss": 0.9178, "step": 570 }, { "epoch": 0.5479846449136276, "grad_norm": 0.08740234375, "learning_rate": 4.520153550863724e-06, "loss": 0.9022, "step": 571 }, { "epoch": 0.5489443378119002, "grad_norm": 0.0927734375, "learning_rate": 4.510556621880998e-06, "loss": 0.858, "step": 572 }, { "epoch": 0.5499040307101728, "grad_norm": 0.0908203125, "learning_rate": 4.500959692898273e-06, "loss": 0.8855, "step": 573 }, { "epoch": 0.5508637236084453, "grad_norm": 0.1181640625, "learning_rate": 4.491362763915547e-06, "loss": 1.0215, "step": 574 }, { "epoch": 0.5518234165067178, "grad_norm": 0.1689453125, "learning_rate": 4.481765834932822e-06, "loss": 1.0886, "step": 575 }, { "epoch": 0.5527831094049904, "grad_norm": 0.10498046875, "learning_rate": 4.472168905950096e-06, "loss": 0.9371, "step": 576 }, { "epoch": 0.553742802303263, "grad_norm": 0.09765625, "learning_rate": 4.462571976967371e-06, "loss": 0.9896, "step": 577 }, { "epoch": 0.5547024952015355, "grad_norm": 0.1435546875, "learning_rate": 4.4529750479846455e-06, "loss": 0.9882, "step": 578 }, { "epoch": 0.555662188099808, "grad_norm": 0.08984375, "learning_rate": 4.44337811900192e-06, "loss": 0.839, "step": 579 }, { "epoch": 0.5566218809980806, "grad_norm": 0.0908203125, "learning_rate": 4.433781190019194e-06, "loss": 0.8676, "step": 580 }, { "epoch": 0.5575815738963532, "grad_norm": 0.1005859375, "learning_rate": 4.4241842610364685e-06, "loss": 0.9514, "step": 581 }, { "epoch": 0.5585412667946257, "grad_norm": 0.12158203125, "learning_rate": 4.414587332053743e-06, "loss": 0.9222, "step": 582 }, { "epoch": 0.5595009596928983, "grad_norm": 0.087890625, "learning_rate": 4.404990403071018e-06, "loss": 0.8855, "step": 583 }, { "epoch": 0.5604606525911708, "grad_norm": 0.09912109375, "learning_rate": 4.395393474088292e-06, "loss": 0.8979, "step": 584 }, { "epoch": 0.5614203454894434, "grad_norm": 0.09765625, "learning_rate": 4.385796545105567e-06, "loss": 0.8857, "step": 585 }, { "epoch": 0.5623800383877159, "grad_norm": 0.091796875, "learning_rate": 4.376199616122841e-06, "loss": 0.8976, "step": 586 }, { "epoch": 0.5633397312859885, "grad_norm": 0.1025390625, "learning_rate": 4.366602687140115e-06, "loss": 0.8973, "step": 587 }, { "epoch": 0.564299424184261, "grad_norm": 0.10205078125, "learning_rate": 4.35700575815739e-06, "loss": 0.9081, "step": 588 }, { "epoch": 0.5652591170825336, "grad_norm": 0.0927734375, "learning_rate": 4.347408829174665e-06, "loss": 0.9515, "step": 589 }, { "epoch": 0.5662188099808061, "grad_norm": 0.1083984375, "learning_rate": 4.337811900191939e-06, "loss": 0.9527, "step": 590 }, { "epoch": 0.5671785028790787, "grad_norm": 0.1650390625, "learning_rate": 4.3282149712092134e-06, "loss": 1.024, "step": 591 }, { "epoch": 0.5681381957773513, "grad_norm": 0.10205078125, "learning_rate": 4.318618042226488e-06, "loss": 0.9031, "step": 592 }, { "epoch": 0.5690978886756238, "grad_norm": 0.1455078125, "learning_rate": 4.309021113243763e-06, "loss": 0.9823, "step": 593 }, { "epoch": 0.5700575815738963, "grad_norm": 0.091796875, "learning_rate": 4.299424184261036e-06, "loss": 0.8912, "step": 594 }, { "epoch": 0.5710172744721689, "grad_norm": 0.1025390625, "learning_rate": 4.289827255278312e-06, "loss": 0.8746, "step": 595 }, { "epoch": 0.5719769673704415, "grad_norm": 0.08935546875, "learning_rate": 4.280230326295586e-06, "loss": 0.8366, "step": 596 }, { "epoch": 0.572936660268714, "grad_norm": 0.107421875, "learning_rate": 4.27063339731286e-06, "loss": 0.9169, "step": 597 }, { "epoch": 0.5738963531669866, "grad_norm": 0.091796875, "learning_rate": 4.2610364683301346e-06, "loss": 0.8725, "step": 598 }, { "epoch": 0.5748560460652591, "grad_norm": 0.09765625, "learning_rate": 4.251439539347409e-06, "loss": 0.8822, "step": 599 }, { "epoch": 0.5758157389635317, "grad_norm": 0.09716796875, "learning_rate": 4.241842610364684e-06, "loss": 0.8658, "step": 600 }, { "epoch": 0.5767754318618042, "grad_norm": 0.11181640625, "learning_rate": 4.2322456813819576e-06, "loss": 0.9315, "step": 601 }, { "epoch": 0.5777351247600768, "grad_norm": 0.09765625, "learning_rate": 4.222648752399233e-06, "loss": 0.915, "step": 602 }, { "epoch": 0.5786948176583493, "grad_norm": 0.09521484375, "learning_rate": 4.213051823416507e-06, "loss": 0.8608, "step": 603 }, { "epoch": 0.5796545105566219, "grad_norm": 0.1416015625, "learning_rate": 4.203454894433781e-06, "loss": 0.9974, "step": 604 }, { "epoch": 0.5806142034548945, "grad_norm": 0.10302734375, "learning_rate": 4.193857965451056e-06, "loss": 0.8526, "step": 605 }, { "epoch": 0.581573896353167, "grad_norm": 0.09521484375, "learning_rate": 4.184261036468331e-06, "loss": 0.8498, "step": 606 }, { "epoch": 0.5825335892514395, "grad_norm": 0.16796875, "learning_rate": 4.174664107485605e-06, "loss": 1.011, "step": 607 }, { "epoch": 0.5834932821497121, "grad_norm": 0.0869140625, "learning_rate": 4.1650671785028795e-06, "loss": 0.8683, "step": 608 }, { "epoch": 0.5844529750479847, "grad_norm": 0.0869140625, "learning_rate": 4.155470249520154e-06, "loss": 0.8471, "step": 609 }, { "epoch": 0.5854126679462572, "grad_norm": 0.130859375, "learning_rate": 4.145873320537428e-06, "loss": 0.9454, "step": 610 }, { "epoch": 0.5863723608445297, "grad_norm": 0.09912109375, "learning_rate": 4.1362763915547025e-06, "loss": 0.9189, "step": 611 }, { "epoch": 0.5873320537428023, "grad_norm": 0.0908203125, "learning_rate": 4.126679462571978e-06, "loss": 0.8792, "step": 612 }, { "epoch": 0.5882917466410749, "grad_norm": 0.1337890625, "learning_rate": 4.117082533589252e-06, "loss": 1.0256, "step": 613 }, { "epoch": 0.5892514395393474, "grad_norm": 0.1064453125, "learning_rate": 4.107485604606526e-06, "loss": 0.8726, "step": 614 }, { "epoch": 0.5902111324376199, "grad_norm": 0.080078125, "learning_rate": 4.097888675623801e-06, "loss": 0.8566, "step": 615 }, { "epoch": 0.5911708253358925, "grad_norm": 0.099609375, "learning_rate": 4.088291746641076e-06, "loss": 0.8521, "step": 616 }, { "epoch": 0.5921305182341651, "grad_norm": 0.08935546875, "learning_rate": 4.078694817658349e-06, "loss": 0.8877, "step": 617 }, { "epoch": 0.5930902111324377, "grad_norm": 0.0830078125, "learning_rate": 4.0690978886756245e-06, "loss": 0.8607, "step": 618 }, { "epoch": 0.5940499040307101, "grad_norm": 0.166015625, "learning_rate": 4.059500959692899e-06, "loss": 1.0081, "step": 619 }, { "epoch": 0.5950095969289827, "grad_norm": 0.08984375, "learning_rate": 4.049904030710173e-06, "loss": 0.8838, "step": 620 }, { "epoch": 0.5959692898272553, "grad_norm": 0.1416015625, "learning_rate": 4.0403071017274475e-06, "loss": 0.9923, "step": 621 }, { "epoch": 0.5969289827255279, "grad_norm": 0.08740234375, "learning_rate": 4.030710172744722e-06, "loss": 0.8822, "step": 622 }, { "epoch": 0.5978886756238004, "grad_norm": 0.0947265625, "learning_rate": 4.021113243761996e-06, "loss": 0.8951, "step": 623 }, { "epoch": 0.5988483685220729, "grad_norm": 0.09228515625, "learning_rate": 4.0115163147792705e-06, "loss": 0.8574, "step": 624 }, { "epoch": 0.5998080614203455, "grad_norm": 0.0947265625, "learning_rate": 4.001919385796546e-06, "loss": 0.9205, "step": 625 }, { "epoch": 0.6007677543186181, "grad_norm": 0.10986328125, "learning_rate": 3.99232245681382e-06, "loss": 0.8943, "step": 626 }, { "epoch": 0.6017274472168906, "grad_norm": 0.09716796875, "learning_rate": 3.982725527831094e-06, "loss": 0.8803, "step": 627 }, { "epoch": 0.6026871401151631, "grad_norm": 0.1181640625, "learning_rate": 3.973128598848369e-06, "loss": 0.9062, "step": 628 }, { "epoch": 0.6036468330134357, "grad_norm": 0.08447265625, "learning_rate": 3.963531669865644e-06, "loss": 0.8578, "step": 629 }, { "epoch": 0.6046065259117083, "grad_norm": 0.09375, "learning_rate": 3.953934740882917e-06, "loss": 0.8856, "step": 630 }, { "epoch": 0.6055662188099808, "grad_norm": 0.087890625, "learning_rate": 3.9443378119001924e-06, "loss": 0.8826, "step": 631 }, { "epoch": 0.6065259117082533, "grad_norm": 0.09326171875, "learning_rate": 3.934740882917467e-06, "loss": 0.8445, "step": 632 }, { "epoch": 0.6074856046065259, "grad_norm": 0.087890625, "learning_rate": 3.925143953934741e-06, "loss": 0.8739, "step": 633 }, { "epoch": 0.6084452975047985, "grad_norm": 0.11181640625, "learning_rate": 3.915547024952015e-06, "loss": 0.9497, "step": 634 }, { "epoch": 0.6094049904030711, "grad_norm": 0.08154296875, "learning_rate": 3.905950095969291e-06, "loss": 0.8306, "step": 635 }, { "epoch": 0.6103646833013435, "grad_norm": 0.130859375, "learning_rate": 3.896353166986565e-06, "loss": 0.9456, "step": 636 }, { "epoch": 0.6113243761996161, "grad_norm": 0.11181640625, "learning_rate": 3.886756238003839e-06, "loss": 0.9411, "step": 637 }, { "epoch": 0.6122840690978887, "grad_norm": 0.08984375, "learning_rate": 3.8771593090211136e-06, "loss": 0.9256, "step": 638 }, { "epoch": 0.6132437619961613, "grad_norm": 0.08935546875, "learning_rate": 3.867562380038388e-06, "loss": 0.8409, "step": 639 }, { "epoch": 0.6142034548944337, "grad_norm": 0.09716796875, "learning_rate": 3.857965451055662e-06, "loss": 0.9718, "step": 640 }, { "epoch": 0.6151631477927063, "grad_norm": 0.0869140625, "learning_rate": 3.848368522072937e-06, "loss": 0.9014, "step": 641 }, { "epoch": 0.6161228406909789, "grad_norm": 0.0966796875, "learning_rate": 3.838771593090212e-06, "loss": 0.9303, "step": 642 }, { "epoch": 0.6170825335892515, "grad_norm": 0.10107421875, "learning_rate": 3.829174664107486e-06, "loss": 0.918, "step": 643 }, { "epoch": 0.6180422264875239, "grad_norm": 0.138671875, "learning_rate": 3.81957773512476e-06, "loss": 0.8488, "step": 644 }, { "epoch": 0.6190019193857965, "grad_norm": 0.0849609375, "learning_rate": 3.809980806142035e-06, "loss": 0.892, "step": 645 }, { "epoch": 0.6199616122840691, "grad_norm": 0.10595703125, "learning_rate": 3.8003838771593095e-06, "loss": 0.865, "step": 646 }, { "epoch": 0.6209213051823417, "grad_norm": 0.087890625, "learning_rate": 3.7907869481765834e-06, "loss": 0.8873, "step": 647 }, { "epoch": 0.6218809980806143, "grad_norm": 0.09619140625, "learning_rate": 3.781190019193858e-06, "loss": 0.8258, "step": 648 }, { "epoch": 0.6228406909788867, "grad_norm": 0.08984375, "learning_rate": 3.7715930902111324e-06, "loss": 0.8526, "step": 649 }, { "epoch": 0.6238003838771593, "grad_norm": 0.09716796875, "learning_rate": 3.761996161228407e-06, "loss": 0.8948, "step": 650 }, { "epoch": 0.6247600767754319, "grad_norm": 0.0966796875, "learning_rate": 3.7523992322456815e-06, "loss": 0.9332, "step": 651 }, { "epoch": 0.6257197696737045, "grad_norm": 0.10107421875, "learning_rate": 3.7428023032629563e-06, "loss": 0.8915, "step": 652 }, { "epoch": 0.6266794625719769, "grad_norm": 0.09423828125, "learning_rate": 3.7332053742802306e-06, "loss": 0.8898, "step": 653 }, { "epoch": 0.6276391554702495, "grad_norm": 0.09765625, "learning_rate": 3.7236084452975053e-06, "loss": 0.9066, "step": 654 }, { "epoch": 0.6285988483685221, "grad_norm": 0.08837890625, "learning_rate": 3.7140115163147792e-06, "loss": 0.8427, "step": 655 }, { "epoch": 0.6295585412667947, "grad_norm": 0.10302734375, "learning_rate": 3.704414587332054e-06, "loss": 0.8633, "step": 656 }, { "epoch": 0.6305182341650671, "grad_norm": 0.115234375, "learning_rate": 3.6948176583493283e-06, "loss": 0.9561, "step": 657 }, { "epoch": 0.6314779270633397, "grad_norm": 0.1083984375, "learning_rate": 3.685220729366603e-06, "loss": 0.8768, "step": 658 }, { "epoch": 0.6324376199616123, "grad_norm": 0.0869140625, "learning_rate": 3.6756238003838774e-06, "loss": 0.8322, "step": 659 }, { "epoch": 0.6333973128598849, "grad_norm": 0.0986328125, "learning_rate": 3.666026871401152e-06, "loss": 0.9367, "step": 660 }, { "epoch": 0.6343570057581573, "grad_norm": 0.10302734375, "learning_rate": 3.6564299424184265e-06, "loss": 0.9112, "step": 661 }, { "epoch": 0.6353166986564299, "grad_norm": 0.10888671875, "learning_rate": 3.6468330134357012e-06, "loss": 0.9364, "step": 662 }, { "epoch": 0.6362763915547025, "grad_norm": 0.1083984375, "learning_rate": 3.637236084452975e-06, "loss": 0.8825, "step": 663 }, { "epoch": 0.6372360844529751, "grad_norm": 0.10693359375, "learning_rate": 3.62763915547025e-06, "loss": 0.927, "step": 664 }, { "epoch": 0.6381957773512476, "grad_norm": 0.08447265625, "learning_rate": 3.618042226487524e-06, "loss": 0.9676, "step": 665 }, { "epoch": 0.6391554702495201, "grad_norm": 0.103515625, "learning_rate": 3.608445297504799e-06, "loss": 0.7565, "step": 666 }, { "epoch": 0.6401151631477927, "grad_norm": 0.08154296875, "learning_rate": 3.5988483685220733e-06, "loss": 0.8226, "step": 667 }, { "epoch": 0.6410748560460653, "grad_norm": 0.10205078125, "learning_rate": 3.589251439539348e-06, "loss": 0.9409, "step": 668 }, { "epoch": 0.6420345489443378, "grad_norm": 0.1015625, "learning_rate": 3.5796545105566224e-06, "loss": 0.8866, "step": 669 }, { "epoch": 0.6429942418426103, "grad_norm": 0.0859375, "learning_rate": 3.5700575815738963e-06, "loss": 0.8802, "step": 670 }, { "epoch": 0.6439539347408829, "grad_norm": 0.0849609375, "learning_rate": 3.560460652591171e-06, "loss": 0.8443, "step": 671 }, { "epoch": 0.6449136276391555, "grad_norm": 0.10302734375, "learning_rate": 3.5508637236084453e-06, "loss": 0.9877, "step": 672 }, { "epoch": 0.6458733205374281, "grad_norm": 0.1005859375, "learning_rate": 3.54126679462572e-06, "loss": 0.8619, "step": 673 }, { "epoch": 0.6468330134357005, "grad_norm": 0.10205078125, "learning_rate": 3.5316698656429944e-06, "loss": 0.9202, "step": 674 }, { "epoch": 0.6477927063339731, "grad_norm": 0.08935546875, "learning_rate": 3.522072936660269e-06, "loss": 0.8944, "step": 675 }, { "epoch": 0.6487523992322457, "grad_norm": 0.0849609375, "learning_rate": 3.512476007677543e-06, "loss": 0.8625, "step": 676 }, { "epoch": 0.6497120921305183, "grad_norm": 0.10498046875, "learning_rate": 3.502879078694818e-06, "loss": 0.8948, "step": 677 }, { "epoch": 0.6506717850287908, "grad_norm": 0.1171875, "learning_rate": 3.493282149712092e-06, "loss": 0.9505, "step": 678 }, { "epoch": 0.6516314779270633, "grad_norm": 0.08935546875, "learning_rate": 3.483685220729367e-06, "loss": 0.8514, "step": 679 }, { "epoch": 0.6525911708253359, "grad_norm": 0.09814453125, "learning_rate": 3.4740882917466412e-06, "loss": 0.9246, "step": 680 }, { "epoch": 0.6535508637236085, "grad_norm": 0.09326171875, "learning_rate": 3.464491362763916e-06, "loss": 0.8808, "step": 681 }, { "epoch": 0.654510556621881, "grad_norm": 0.1220703125, "learning_rate": 3.4548944337811903e-06, "loss": 0.9993, "step": 682 }, { "epoch": 0.6554702495201535, "grad_norm": 0.08203125, "learning_rate": 3.445297504798465e-06, "loss": 0.8682, "step": 683 }, { "epoch": 0.6564299424184261, "grad_norm": 0.0947265625, "learning_rate": 3.435700575815739e-06, "loss": 0.8945, "step": 684 }, { "epoch": 0.6573896353166987, "grad_norm": 0.08447265625, "learning_rate": 3.4261036468330137e-06, "loss": 0.8457, "step": 685 }, { "epoch": 0.6583493282149712, "grad_norm": 0.09375, "learning_rate": 3.416506717850288e-06, "loss": 0.8959, "step": 686 }, { "epoch": 0.6593090211132437, "grad_norm": 0.08740234375, "learning_rate": 3.4069097888675628e-06, "loss": 0.8913, "step": 687 }, { "epoch": 0.6602687140115163, "grad_norm": 0.09765625, "learning_rate": 3.397312859884837e-06, "loss": 0.9114, "step": 688 }, { "epoch": 0.6612284069097889, "grad_norm": 0.0869140625, "learning_rate": 3.387715930902112e-06, "loss": 0.8474, "step": 689 }, { "epoch": 0.6621880998080614, "grad_norm": 0.0947265625, "learning_rate": 3.378119001919386e-06, "loss": 0.877, "step": 690 }, { "epoch": 0.663147792706334, "grad_norm": 0.09375, "learning_rate": 3.368522072936661e-06, "loss": 0.878, "step": 691 }, { "epoch": 0.6641074856046065, "grad_norm": 0.1435546875, "learning_rate": 3.358925143953935e-06, "loss": 1.0548, "step": 692 }, { "epoch": 0.6650671785028791, "grad_norm": 0.08349609375, "learning_rate": 3.3493282149712096e-06, "loss": 0.8348, "step": 693 }, { "epoch": 0.6660268714011516, "grad_norm": 0.103515625, "learning_rate": 3.339731285988484e-06, "loss": 0.9428, "step": 694 }, { "epoch": 0.6669865642994242, "grad_norm": 0.10400390625, "learning_rate": 3.3301343570057582e-06, "loss": 0.8574, "step": 695 }, { "epoch": 0.6679462571976967, "grad_norm": 0.1474609375, "learning_rate": 3.320537428023033e-06, "loss": 1.0041, "step": 696 }, { "epoch": 0.6689059500959693, "grad_norm": 0.083984375, "learning_rate": 3.3109404990403073e-06, "loss": 0.9344, "step": 697 }, { "epoch": 0.6698656429942419, "grad_norm": 0.0927734375, "learning_rate": 3.301343570057582e-06, "loss": 0.8579, "step": 698 }, { "epoch": 0.6708253358925144, "grad_norm": 0.09130859375, "learning_rate": 3.291746641074856e-06, "loss": 0.8861, "step": 699 }, { "epoch": 0.6717850287907869, "grad_norm": 0.08447265625, "learning_rate": 3.2821497120921307e-06, "loss": 0.841, "step": 700 }, { "epoch": 0.6727447216890595, "grad_norm": 0.1142578125, "learning_rate": 3.272552783109405e-06, "loss": 0.9662, "step": 701 }, { "epoch": 0.6737044145873321, "grad_norm": 0.0986328125, "learning_rate": 3.26295585412668e-06, "loss": 0.9112, "step": 702 }, { "epoch": 0.6746641074856046, "grad_norm": 0.1298828125, "learning_rate": 3.253358925143954e-06, "loss": 1.0551, "step": 703 }, { "epoch": 0.6756238003838771, "grad_norm": 0.1171875, "learning_rate": 3.243761996161229e-06, "loss": 0.8602, "step": 704 }, { "epoch": 0.6765834932821497, "grad_norm": 0.099609375, "learning_rate": 3.2341650671785028e-06, "loss": 0.9373, "step": 705 }, { "epoch": 0.6775431861804223, "grad_norm": 0.1357421875, "learning_rate": 3.224568138195778e-06, "loss": 0.9555, "step": 706 }, { "epoch": 0.6785028790786948, "grad_norm": 0.142578125, "learning_rate": 3.214971209213052e-06, "loss": 0.97, "step": 707 }, { "epoch": 0.6794625719769674, "grad_norm": 0.0859375, "learning_rate": 3.2053742802303266e-06, "loss": 0.833, "step": 708 }, { "epoch": 0.6804222648752399, "grad_norm": 0.107421875, "learning_rate": 3.195777351247601e-06, "loss": 0.8036, "step": 709 }, { "epoch": 0.6813819577735125, "grad_norm": 0.11328125, "learning_rate": 3.1861804222648757e-06, "loss": 0.9721, "step": 710 }, { "epoch": 0.682341650671785, "grad_norm": 0.0869140625, "learning_rate": 3.17658349328215e-06, "loss": 0.8539, "step": 711 }, { "epoch": 0.6833013435700576, "grad_norm": 0.0869140625, "learning_rate": 3.1669865642994248e-06, "loss": 0.8504, "step": 712 }, { "epoch": 0.6842610364683301, "grad_norm": 0.1044921875, "learning_rate": 3.1573896353166987e-06, "loss": 0.878, "step": 713 }, { "epoch": 0.6852207293666027, "grad_norm": 0.1025390625, "learning_rate": 3.1477927063339734e-06, "loss": 0.8945, "step": 714 }, { "epoch": 0.6861804222648752, "grad_norm": 0.099609375, "learning_rate": 3.1381957773512477e-06, "loss": 0.9088, "step": 715 }, { "epoch": 0.6871401151631478, "grad_norm": 0.1005859375, "learning_rate": 3.1285988483685225e-06, "loss": 0.9088, "step": 716 }, { "epoch": 0.6880998080614203, "grad_norm": 0.09619140625, "learning_rate": 3.119001919385797e-06, "loss": 0.8658, "step": 717 }, { "epoch": 0.6890595009596929, "grad_norm": 0.11865234375, "learning_rate": 3.1094049904030716e-06, "loss": 0.8723, "step": 718 }, { "epoch": 0.6900191938579654, "grad_norm": 0.154296875, "learning_rate": 3.099808061420346e-06, "loss": 0.9998, "step": 719 }, { "epoch": 0.690978886756238, "grad_norm": 0.09765625, "learning_rate": 3.09021113243762e-06, "loss": 0.8432, "step": 720 }, { "epoch": 0.6919385796545106, "grad_norm": 0.1005859375, "learning_rate": 3.0806142034548945e-06, "loss": 0.8783, "step": 721 }, { "epoch": 0.6928982725527831, "grad_norm": 0.09423828125, "learning_rate": 3.071017274472169e-06, "loss": 0.8529, "step": 722 }, { "epoch": 0.6938579654510557, "grad_norm": 0.09716796875, "learning_rate": 3.0614203454894436e-06, "loss": 0.8756, "step": 723 }, { "epoch": 0.6948176583493282, "grad_norm": 0.109375, "learning_rate": 3.051823416506718e-06, "loss": 0.9314, "step": 724 }, { "epoch": 0.6957773512476008, "grad_norm": 0.09716796875, "learning_rate": 3.0422264875239927e-06, "loss": 0.8719, "step": 725 }, { "epoch": 0.6967370441458733, "grad_norm": 0.09716796875, "learning_rate": 3.032629558541267e-06, "loss": 0.881, "step": 726 }, { "epoch": 0.6976967370441459, "grad_norm": 0.1552734375, "learning_rate": 3.0230326295585418e-06, "loss": 0.97, "step": 727 }, { "epoch": 0.6986564299424184, "grad_norm": 0.1015625, "learning_rate": 3.0134357005758157e-06, "loss": 0.8983, "step": 728 }, { "epoch": 0.699616122840691, "grad_norm": 0.08740234375, "learning_rate": 3.0038387715930904e-06, "loss": 0.8501, "step": 729 }, { "epoch": 0.7005758157389635, "grad_norm": 0.103515625, "learning_rate": 2.9942418426103648e-06, "loss": 0.8754, "step": 730 }, { "epoch": 0.7015355086372361, "grad_norm": 0.1083984375, "learning_rate": 2.9846449136276395e-06, "loss": 0.9421, "step": 731 }, { "epoch": 0.7024952015355086, "grad_norm": 0.1328125, "learning_rate": 2.975047984644914e-06, "loss": 0.9579, "step": 732 }, { "epoch": 0.7034548944337812, "grad_norm": 0.095703125, "learning_rate": 2.9654510556621886e-06, "loss": 0.8518, "step": 733 }, { "epoch": 0.7044145873320538, "grad_norm": 0.11962890625, "learning_rate": 2.9558541266794625e-06, "loss": 0.9811, "step": 734 }, { "epoch": 0.7053742802303263, "grad_norm": 0.09326171875, "learning_rate": 2.9462571976967377e-06, "loss": 0.9039, "step": 735 }, { "epoch": 0.7063339731285988, "grad_norm": 0.11083984375, "learning_rate": 2.9366602687140116e-06, "loss": 0.914, "step": 736 }, { "epoch": 0.7072936660268714, "grad_norm": 0.08935546875, "learning_rate": 2.9270633397312863e-06, "loss": 0.8289, "step": 737 }, { "epoch": 0.708253358925144, "grad_norm": 0.08251953125, "learning_rate": 2.9174664107485606e-06, "loss": 0.8639, "step": 738 }, { "epoch": 0.7092130518234165, "grad_norm": 0.09423828125, "learning_rate": 2.9078694817658354e-06, "loss": 0.9277, "step": 739 }, { "epoch": 0.710172744721689, "grad_norm": 0.08984375, "learning_rate": 2.8982725527831097e-06, "loss": 0.8812, "step": 740 }, { "epoch": 0.7111324376199616, "grad_norm": 0.09765625, "learning_rate": 2.8886756238003845e-06, "loss": 0.9588, "step": 741 }, { "epoch": 0.7120921305182342, "grad_norm": 0.1103515625, "learning_rate": 2.8790786948176584e-06, "loss": 0.9299, "step": 742 }, { "epoch": 0.7130518234165067, "grad_norm": 0.095703125, "learning_rate": 2.869481765834933e-06, "loss": 0.9102, "step": 743 }, { "epoch": 0.7140115163147792, "grad_norm": 0.0986328125, "learning_rate": 2.8598848368522074e-06, "loss": 0.9518, "step": 744 }, { "epoch": 0.7149712092130518, "grad_norm": 0.08349609375, "learning_rate": 2.8502879078694818e-06, "loss": 0.8438, "step": 745 }, { "epoch": 0.7159309021113244, "grad_norm": 0.11865234375, "learning_rate": 2.8406909788867565e-06, "loss": 0.9486, "step": 746 }, { "epoch": 0.716890595009597, "grad_norm": 0.091796875, "learning_rate": 2.831094049904031e-06, "loss": 0.9467, "step": 747 }, { "epoch": 0.7178502879078695, "grad_norm": 0.099609375, "learning_rate": 2.8214971209213056e-06, "loss": 0.897, "step": 748 }, { "epoch": 0.718809980806142, "grad_norm": 0.14453125, "learning_rate": 2.8119001919385795e-06, "loss": 0.9883, "step": 749 }, { "epoch": 0.7197696737044146, "grad_norm": 0.08544921875, "learning_rate": 2.8023032629558543e-06, "loss": 0.8568, "step": 750 }, { "epoch": 0.7207293666026872, "grad_norm": 0.10107421875, "learning_rate": 2.7927063339731286e-06, "loss": 0.9299, "step": 751 }, { "epoch": 0.7216890595009597, "grad_norm": 0.1005859375, "learning_rate": 2.7831094049904033e-06, "loss": 0.9514, "step": 752 }, { "epoch": 0.7226487523992322, "grad_norm": 0.10302734375, "learning_rate": 2.7735124760076777e-06, "loss": 0.8542, "step": 753 }, { "epoch": 0.7236084452975048, "grad_norm": 0.08642578125, "learning_rate": 2.7639155470249524e-06, "loss": 0.9055, "step": 754 }, { "epoch": 0.7245681381957774, "grad_norm": 0.1015625, "learning_rate": 2.7543186180422267e-06, "loss": 0.975, "step": 755 }, { "epoch": 0.72552783109405, "grad_norm": 0.11572265625, "learning_rate": 2.7447216890595015e-06, "loss": 0.9118, "step": 756 }, { "epoch": 0.7264875239923224, "grad_norm": 0.09619140625, "learning_rate": 2.7351247600767754e-06, "loss": 0.8825, "step": 757 }, { "epoch": 0.727447216890595, "grad_norm": 0.16015625, "learning_rate": 2.72552783109405e-06, "loss": 0.8799, "step": 758 }, { "epoch": 0.7284069097888676, "grad_norm": 0.09033203125, "learning_rate": 2.7159309021113245e-06, "loss": 0.8458, "step": 759 }, { "epoch": 0.7293666026871402, "grad_norm": 0.10888671875, "learning_rate": 2.7063339731285992e-06, "loss": 0.9211, "step": 760 }, { "epoch": 0.7303262955854126, "grad_norm": 0.0927734375, "learning_rate": 2.6967370441458735e-06, "loss": 0.9039, "step": 761 }, { "epoch": 0.7312859884836852, "grad_norm": 0.0966796875, "learning_rate": 2.6871401151631483e-06, "loss": 0.8405, "step": 762 }, { "epoch": 0.7322456813819578, "grad_norm": 0.10498046875, "learning_rate": 2.6775431861804226e-06, "loss": 0.9307, "step": 763 }, { "epoch": 0.7332053742802304, "grad_norm": 0.08984375, "learning_rate": 2.6679462571976974e-06, "loss": 0.8627, "step": 764 }, { "epoch": 0.7341650671785028, "grad_norm": 0.10009765625, "learning_rate": 2.6583493282149713e-06, "loss": 0.9403, "step": 765 }, { "epoch": 0.7351247600767754, "grad_norm": 0.08349609375, "learning_rate": 2.648752399232246e-06, "loss": 0.8884, "step": 766 }, { "epoch": 0.736084452975048, "grad_norm": 0.203125, "learning_rate": 2.6391554702495203e-06, "loss": 1.0263, "step": 767 }, { "epoch": 0.7370441458733206, "grad_norm": 0.169921875, "learning_rate": 2.629558541266795e-06, "loss": 0.9245, "step": 768 }, { "epoch": 0.738003838771593, "grad_norm": 0.09765625, "learning_rate": 2.6199616122840694e-06, "loss": 0.91, "step": 769 }, { "epoch": 0.7389635316698656, "grad_norm": 0.1259765625, "learning_rate": 2.6103646833013433e-06, "loss": 0.9534, "step": 770 }, { "epoch": 0.7399232245681382, "grad_norm": 0.12353515625, "learning_rate": 2.600767754318618e-06, "loss": 0.8804, "step": 771 }, { "epoch": 0.7408829174664108, "grad_norm": 0.1005859375, "learning_rate": 2.5911708253358924e-06, "loss": 0.9345, "step": 772 }, { "epoch": 0.7418426103646834, "grad_norm": 0.10400390625, "learning_rate": 2.581573896353167e-06, "loss": 0.8712, "step": 773 }, { "epoch": 0.7428023032629558, "grad_norm": 0.09423828125, "learning_rate": 2.5719769673704415e-06, "loss": 0.9098, "step": 774 }, { "epoch": 0.7437619961612284, "grad_norm": 0.08935546875, "learning_rate": 2.5623800383877162e-06, "loss": 0.9252, "step": 775 }, { "epoch": 0.744721689059501, "grad_norm": 0.10546875, "learning_rate": 2.5527831094049906e-06, "loss": 0.8881, "step": 776 }, { "epoch": 0.7456813819577736, "grad_norm": 0.095703125, "learning_rate": 2.5431861804222653e-06, "loss": 0.9383, "step": 777 }, { "epoch": 0.746641074856046, "grad_norm": 0.11474609375, "learning_rate": 2.5335892514395392e-06, "loss": 0.8981, "step": 778 }, { "epoch": 0.7476007677543186, "grad_norm": 0.107421875, "learning_rate": 2.523992322456814e-06, "loss": 0.9791, "step": 779 }, { "epoch": 0.7485604606525912, "grad_norm": 0.078125, "learning_rate": 2.5143953934740883e-06, "loss": 0.8725, "step": 780 }, { "epoch": 0.7495201535508638, "grad_norm": 0.095703125, "learning_rate": 2.504798464491363e-06, "loss": 0.9092, "step": 781 }, { "epoch": 0.7504798464491362, "grad_norm": 0.1044921875, "learning_rate": 2.4952015355086374e-06, "loss": 0.8934, "step": 782 }, { "epoch": 0.7514395393474088, "grad_norm": 0.12158203125, "learning_rate": 2.4856046065259117e-06, "loss": 0.9178, "step": 783 }, { "epoch": 0.7523992322456814, "grad_norm": 0.0986328125, "learning_rate": 2.4760076775431864e-06, "loss": 0.8888, "step": 784 }, { "epoch": 0.753358925143954, "grad_norm": 0.1044921875, "learning_rate": 2.4664107485604608e-06, "loss": 0.9234, "step": 785 }, { "epoch": 0.7543186180422264, "grad_norm": 0.09033203125, "learning_rate": 2.456813819577735e-06, "loss": 0.8478, "step": 786 }, { "epoch": 0.755278310940499, "grad_norm": 0.103515625, "learning_rate": 2.44721689059501e-06, "loss": 0.9564, "step": 787 }, { "epoch": 0.7562380038387716, "grad_norm": 0.09521484375, "learning_rate": 2.437619961612284e-06, "loss": 0.8473, "step": 788 }, { "epoch": 0.7571976967370442, "grad_norm": 0.10400390625, "learning_rate": 2.4280230326295585e-06, "loss": 0.9126, "step": 789 }, { "epoch": 0.7581573896353166, "grad_norm": 0.10009765625, "learning_rate": 2.4184261036468333e-06, "loss": 0.9022, "step": 790 }, { "epoch": 0.7591170825335892, "grad_norm": 0.08251953125, "learning_rate": 2.4088291746641076e-06, "loss": 0.8685, "step": 791 }, { "epoch": 0.7600767754318618, "grad_norm": 0.1640625, "learning_rate": 2.3992322456813823e-06, "loss": 1.0382, "step": 792 }, { "epoch": 0.7610364683301344, "grad_norm": 0.103515625, "learning_rate": 2.3896353166986567e-06, "loss": 0.888, "step": 793 }, { "epoch": 0.761996161228407, "grad_norm": 0.099609375, "learning_rate": 2.380038387715931e-06, "loss": 0.8546, "step": 794 }, { "epoch": 0.7629558541266794, "grad_norm": 0.09814453125, "learning_rate": 2.3704414587332057e-06, "loss": 0.8766, "step": 795 }, { "epoch": 0.763915547024952, "grad_norm": 0.0927734375, "learning_rate": 2.36084452975048e-06, "loss": 0.8559, "step": 796 }, { "epoch": 0.7648752399232246, "grad_norm": 0.09521484375, "learning_rate": 2.3512476007677544e-06, "loss": 0.8919, "step": 797 }, { "epoch": 0.7658349328214972, "grad_norm": 0.1572265625, "learning_rate": 2.341650671785029e-06, "loss": 1.005, "step": 798 }, { "epoch": 0.7667946257197696, "grad_norm": 0.1318359375, "learning_rate": 2.3320537428023035e-06, "loss": 0.8963, "step": 799 }, { "epoch": 0.7677543186180422, "grad_norm": 0.109375, "learning_rate": 2.322456813819578e-06, "loss": 0.9342, "step": 800 }, { "epoch": 0.7687140115163148, "grad_norm": 0.099609375, "learning_rate": 2.3128598848368525e-06, "loss": 0.904, "step": 801 }, { "epoch": 0.7696737044145874, "grad_norm": 0.08251953125, "learning_rate": 2.303262955854127e-06, "loss": 0.8932, "step": 802 }, { "epoch": 0.7706333973128598, "grad_norm": 0.08740234375, "learning_rate": 2.2936660268714016e-06, "loss": 0.8518, "step": 803 }, { "epoch": 0.7715930902111324, "grad_norm": 0.09765625, "learning_rate": 2.284069097888676e-06, "loss": 0.8464, "step": 804 }, { "epoch": 0.772552783109405, "grad_norm": 0.0888671875, "learning_rate": 2.2744721689059503e-06, "loss": 0.8479, "step": 805 }, { "epoch": 0.7735124760076776, "grad_norm": 0.08837890625, "learning_rate": 2.264875239923225e-06, "loss": 0.8904, "step": 806 }, { "epoch": 0.77447216890595, "grad_norm": 0.0986328125, "learning_rate": 2.255278310940499e-06, "loss": 0.8472, "step": 807 }, { "epoch": 0.7754318618042226, "grad_norm": 0.09814453125, "learning_rate": 2.2456813819577737e-06, "loss": 0.9097, "step": 808 }, { "epoch": 0.7763915547024952, "grad_norm": 0.11328125, "learning_rate": 2.236084452975048e-06, "loss": 0.9732, "step": 809 }, { "epoch": 0.7773512476007678, "grad_norm": 0.1025390625, "learning_rate": 2.2264875239923228e-06, "loss": 0.8784, "step": 810 }, { "epoch": 0.7783109404990403, "grad_norm": 0.11572265625, "learning_rate": 2.216890595009597e-06, "loss": 0.9363, "step": 811 }, { "epoch": 0.7792706333973128, "grad_norm": 0.12890625, "learning_rate": 2.2072936660268714e-06, "loss": 0.9103, "step": 812 }, { "epoch": 0.7802303262955854, "grad_norm": 0.0966796875, "learning_rate": 2.197696737044146e-06, "loss": 0.9041, "step": 813 }, { "epoch": 0.781190019193858, "grad_norm": 0.1484375, "learning_rate": 2.1880998080614205e-06, "loss": 1.0047, "step": 814 }, { "epoch": 0.7821497120921305, "grad_norm": 0.10107421875, "learning_rate": 2.178502879078695e-06, "loss": 0.8674, "step": 815 }, { "epoch": 0.783109404990403, "grad_norm": 0.10546875, "learning_rate": 2.1689059500959696e-06, "loss": 0.945, "step": 816 }, { "epoch": 0.7840690978886756, "grad_norm": 0.0947265625, "learning_rate": 2.159309021113244e-06, "loss": 0.9308, "step": 817 }, { "epoch": 0.7850287907869482, "grad_norm": 0.087890625, "learning_rate": 2.149712092130518e-06, "loss": 0.8592, "step": 818 }, { "epoch": 0.7859884836852208, "grad_norm": 0.10693359375, "learning_rate": 2.140115163147793e-06, "loss": 0.8601, "step": 819 }, { "epoch": 0.7869481765834933, "grad_norm": 0.1025390625, "learning_rate": 2.1305182341650673e-06, "loss": 0.908, "step": 820 }, { "epoch": 0.7879078694817658, "grad_norm": 0.10546875, "learning_rate": 2.120921305182342e-06, "loss": 0.8875, "step": 821 }, { "epoch": 0.7888675623800384, "grad_norm": 0.10595703125, "learning_rate": 2.1113243761996164e-06, "loss": 0.9093, "step": 822 }, { "epoch": 0.789827255278311, "grad_norm": 0.091796875, "learning_rate": 2.1017274472168907e-06, "loss": 0.8605, "step": 823 }, { "epoch": 0.7907869481765835, "grad_norm": 0.08349609375, "learning_rate": 2.0921305182341654e-06, "loss": 0.839, "step": 824 }, { "epoch": 0.791746641074856, "grad_norm": 0.11181640625, "learning_rate": 2.0825335892514398e-06, "loss": 0.9268, "step": 825 }, { "epoch": 0.7927063339731286, "grad_norm": 0.0830078125, "learning_rate": 2.072936660268714e-06, "loss": 0.8654, "step": 826 }, { "epoch": 0.7936660268714012, "grad_norm": 0.09375, "learning_rate": 2.063339731285989e-06, "loss": 0.8853, "step": 827 }, { "epoch": 0.7946257197696737, "grad_norm": 0.08642578125, "learning_rate": 2.053742802303263e-06, "loss": 0.9269, "step": 828 }, { "epoch": 0.7955854126679462, "grad_norm": 0.08447265625, "learning_rate": 2.044145873320538e-06, "loss": 0.8849, "step": 829 }, { "epoch": 0.7965451055662188, "grad_norm": 0.1044921875, "learning_rate": 2.0345489443378122e-06, "loss": 0.8988, "step": 830 }, { "epoch": 0.7975047984644914, "grad_norm": 0.1484375, "learning_rate": 2.0249520153550866e-06, "loss": 0.9887, "step": 831 }, { "epoch": 0.7984644913627639, "grad_norm": 0.08642578125, "learning_rate": 2.015355086372361e-06, "loss": 0.818, "step": 832 }, { "epoch": 0.7994241842610365, "grad_norm": 0.1259765625, "learning_rate": 2.0057581573896352e-06, "loss": 0.9224, "step": 833 }, { "epoch": 0.800383877159309, "grad_norm": 0.1318359375, "learning_rate": 1.99616122840691e-06, "loss": 0.9942, "step": 834 }, { "epoch": 0.8013435700575816, "grad_norm": 0.1005859375, "learning_rate": 1.9865642994241843e-06, "loss": 0.8478, "step": 835 }, { "epoch": 0.8023032629558541, "grad_norm": 0.1728515625, "learning_rate": 1.9769673704414586e-06, "loss": 0.8404, "step": 836 }, { "epoch": 0.8032629558541267, "grad_norm": 0.09375, "learning_rate": 1.9673704414587334e-06, "loss": 0.8551, "step": 837 }, { "epoch": 0.8042226487523992, "grad_norm": 0.09814453125, "learning_rate": 1.9577735124760077e-06, "loss": 0.9121, "step": 838 }, { "epoch": 0.8051823416506718, "grad_norm": 0.10400390625, "learning_rate": 1.9481765834932825e-06, "loss": 0.9308, "step": 839 }, { "epoch": 0.8061420345489443, "grad_norm": 0.11865234375, "learning_rate": 1.9385796545105568e-06, "loss": 0.9405, "step": 840 }, { "epoch": 0.8071017274472169, "grad_norm": 0.0908203125, "learning_rate": 1.928982725527831e-06, "loss": 0.8425, "step": 841 }, { "epoch": 0.8080614203454894, "grad_norm": 0.10205078125, "learning_rate": 1.919385796545106e-06, "loss": 0.9141, "step": 842 }, { "epoch": 0.809021113243762, "grad_norm": 0.09326171875, "learning_rate": 1.90978886756238e-06, "loss": 0.8844, "step": 843 }, { "epoch": 0.8099808061420346, "grad_norm": 0.1015625, "learning_rate": 1.9001919385796547e-06, "loss": 0.8686, "step": 844 }, { "epoch": 0.8109404990403071, "grad_norm": 0.11181640625, "learning_rate": 1.890595009596929e-06, "loss": 0.9215, "step": 845 }, { "epoch": 0.8119001919385797, "grad_norm": 0.09228515625, "learning_rate": 1.8809980806142036e-06, "loss": 0.873, "step": 846 }, { "epoch": 0.8128598848368522, "grad_norm": 0.0908203125, "learning_rate": 1.8714011516314781e-06, "loss": 0.8606, "step": 847 }, { "epoch": 0.8138195777351248, "grad_norm": 0.1015625, "learning_rate": 1.8618042226487527e-06, "loss": 1.0169, "step": 848 }, { "epoch": 0.8147792706333973, "grad_norm": 0.111328125, "learning_rate": 1.852207293666027e-06, "loss": 0.9263, "step": 849 }, { "epoch": 0.8157389635316699, "grad_norm": 0.0947265625, "learning_rate": 1.8426103646833015e-06, "loss": 0.9282, "step": 850 }, { "epoch": 0.8166986564299424, "grad_norm": 0.09326171875, "learning_rate": 1.833013435700576e-06, "loss": 0.8512, "step": 851 }, { "epoch": 0.817658349328215, "grad_norm": 0.15625, "learning_rate": 1.8234165067178506e-06, "loss": 0.9415, "step": 852 }, { "epoch": 0.8186180422264875, "grad_norm": 0.09521484375, "learning_rate": 1.813819577735125e-06, "loss": 0.8646, "step": 853 }, { "epoch": 0.8195777351247601, "grad_norm": 0.09912109375, "learning_rate": 1.8042226487523995e-06, "loss": 0.9663, "step": 854 }, { "epoch": 0.8205374280230326, "grad_norm": 0.09521484375, "learning_rate": 1.794625719769674e-06, "loss": 0.855, "step": 855 }, { "epoch": 0.8214971209213052, "grad_norm": 0.10595703125, "learning_rate": 1.7850287907869481e-06, "loss": 0.8448, "step": 856 }, { "epoch": 0.8224568138195777, "grad_norm": 0.111328125, "learning_rate": 1.7754318618042227e-06, "loss": 0.9227, "step": 857 }, { "epoch": 0.8234165067178503, "grad_norm": 0.095703125, "learning_rate": 1.7658349328214972e-06, "loss": 0.8926, "step": 858 }, { "epoch": 0.8243761996161229, "grad_norm": 0.09619140625, "learning_rate": 1.7562380038387715e-06, "loss": 0.8505, "step": 859 }, { "epoch": 0.8253358925143954, "grad_norm": 0.09814453125, "learning_rate": 1.746641074856046e-06, "loss": 0.9039, "step": 860 }, { "epoch": 0.8262955854126679, "grad_norm": 0.10205078125, "learning_rate": 1.7370441458733206e-06, "loss": 0.9247, "step": 861 }, { "epoch": 0.8272552783109405, "grad_norm": 0.09912109375, "learning_rate": 1.7274472168905951e-06, "loss": 0.8928, "step": 862 }, { "epoch": 0.8282149712092131, "grad_norm": 0.09228515625, "learning_rate": 1.7178502879078695e-06, "loss": 0.9062, "step": 863 }, { "epoch": 0.8291746641074856, "grad_norm": 0.08984375, "learning_rate": 1.708253358925144e-06, "loss": 0.8303, "step": 864 }, { "epoch": 0.8301343570057581, "grad_norm": 0.0859375, "learning_rate": 1.6986564299424186e-06, "loss": 0.8619, "step": 865 }, { "epoch": 0.8310940499040307, "grad_norm": 0.1025390625, "learning_rate": 1.689059500959693e-06, "loss": 0.8517, "step": 866 }, { "epoch": 0.8320537428023033, "grad_norm": 0.1181640625, "learning_rate": 1.6794625719769674e-06, "loss": 1.0451, "step": 867 }, { "epoch": 0.8330134357005758, "grad_norm": 0.10107421875, "learning_rate": 1.669865642994242e-06, "loss": 0.8912, "step": 868 }, { "epoch": 0.8339731285988484, "grad_norm": 0.11572265625, "learning_rate": 1.6602687140115165e-06, "loss": 0.9978, "step": 869 }, { "epoch": 0.8349328214971209, "grad_norm": 0.11279296875, "learning_rate": 1.650671785028791e-06, "loss": 0.948, "step": 870 }, { "epoch": 0.8358925143953935, "grad_norm": 0.10693359375, "learning_rate": 1.6410748560460654e-06, "loss": 0.9118, "step": 871 }, { "epoch": 0.836852207293666, "grad_norm": 0.08984375, "learning_rate": 1.63147792706334e-06, "loss": 0.9054, "step": 872 }, { "epoch": 0.8378119001919386, "grad_norm": 0.091796875, "learning_rate": 1.6218809980806144e-06, "loss": 0.8954, "step": 873 }, { "epoch": 0.8387715930902111, "grad_norm": 0.09423828125, "learning_rate": 1.612284069097889e-06, "loss": 0.8979, "step": 874 }, { "epoch": 0.8397312859884837, "grad_norm": 0.08642578125, "learning_rate": 1.6026871401151633e-06, "loss": 0.9078, "step": 875 }, { "epoch": 0.8406909788867563, "grad_norm": 0.0888671875, "learning_rate": 1.5930902111324378e-06, "loss": 0.8511, "step": 876 }, { "epoch": 0.8416506717850288, "grad_norm": 0.09521484375, "learning_rate": 1.5834932821497124e-06, "loss": 0.8885, "step": 877 }, { "epoch": 0.8426103646833013, "grad_norm": 0.12451171875, "learning_rate": 1.5738963531669867e-06, "loss": 0.9479, "step": 878 }, { "epoch": 0.8435700575815739, "grad_norm": 0.08642578125, "learning_rate": 1.5642994241842612e-06, "loss": 0.8318, "step": 879 }, { "epoch": 0.8445297504798465, "grad_norm": 0.08837890625, "learning_rate": 1.5547024952015358e-06, "loss": 0.9085, "step": 880 }, { "epoch": 0.845489443378119, "grad_norm": 0.09033203125, "learning_rate": 1.54510556621881e-06, "loss": 0.8787, "step": 881 }, { "epoch": 0.8464491362763915, "grad_norm": 0.08935546875, "learning_rate": 1.5355086372360844e-06, "loss": 0.875, "step": 882 }, { "epoch": 0.8474088291746641, "grad_norm": 0.09521484375, "learning_rate": 1.525911708253359e-06, "loss": 0.8904, "step": 883 }, { "epoch": 0.8483685220729367, "grad_norm": 0.15625, "learning_rate": 1.5163147792706335e-06, "loss": 0.9297, "step": 884 }, { "epoch": 0.8493282149712092, "grad_norm": 0.10498046875, "learning_rate": 1.5067178502879078e-06, "loss": 0.8951, "step": 885 }, { "epoch": 0.8502879078694817, "grad_norm": 0.0908203125, "learning_rate": 1.4971209213051824e-06, "loss": 0.8508, "step": 886 }, { "epoch": 0.8512476007677543, "grad_norm": 0.10302734375, "learning_rate": 1.487523992322457e-06, "loss": 0.924, "step": 887 }, { "epoch": 0.8522072936660269, "grad_norm": 0.08935546875, "learning_rate": 1.4779270633397312e-06, "loss": 0.9006, "step": 888 }, { "epoch": 0.8531669865642995, "grad_norm": 0.08203125, "learning_rate": 1.4683301343570058e-06, "loss": 0.9105, "step": 889 }, { "epoch": 0.8541266794625719, "grad_norm": 0.09423828125, "learning_rate": 1.4587332053742803e-06, "loss": 0.9097, "step": 890 }, { "epoch": 0.8550863723608445, "grad_norm": 0.08935546875, "learning_rate": 1.4491362763915549e-06, "loss": 0.8719, "step": 891 }, { "epoch": 0.8560460652591171, "grad_norm": 0.10888671875, "learning_rate": 1.4395393474088292e-06, "loss": 0.9129, "step": 892 }, { "epoch": 0.8570057581573897, "grad_norm": 0.08642578125, "learning_rate": 1.4299424184261037e-06, "loss": 0.818, "step": 893 }, { "epoch": 0.8579654510556622, "grad_norm": 0.09521484375, "learning_rate": 1.4203454894433783e-06, "loss": 0.8259, "step": 894 }, { "epoch": 0.8589251439539347, "grad_norm": 0.09912109375, "learning_rate": 1.4107485604606528e-06, "loss": 0.9523, "step": 895 }, { "epoch": 0.8598848368522073, "grad_norm": 0.09912109375, "learning_rate": 1.4011516314779271e-06, "loss": 0.8779, "step": 896 }, { "epoch": 0.8608445297504799, "grad_norm": 0.09228515625, "learning_rate": 1.3915547024952017e-06, "loss": 0.8471, "step": 897 }, { "epoch": 0.8618042226487524, "grad_norm": 0.09716796875, "learning_rate": 1.3819577735124762e-06, "loss": 0.8541, "step": 898 }, { "epoch": 0.8627639155470249, "grad_norm": 0.08935546875, "learning_rate": 1.3723608445297507e-06, "loss": 0.8406, "step": 899 }, { "epoch": 0.8637236084452975, "grad_norm": 0.09228515625, "learning_rate": 1.362763915547025e-06, "loss": 0.9321, "step": 900 }, { "epoch": 0.8646833013435701, "grad_norm": 0.10009765625, "learning_rate": 1.3531669865642996e-06, "loss": 0.8372, "step": 901 }, { "epoch": 0.8656429942418427, "grad_norm": 0.1201171875, "learning_rate": 1.3435700575815741e-06, "loss": 0.9387, "step": 902 }, { "epoch": 0.8666026871401151, "grad_norm": 0.09033203125, "learning_rate": 1.3339731285988487e-06, "loss": 0.8453, "step": 903 }, { "epoch": 0.8675623800383877, "grad_norm": 0.10107421875, "learning_rate": 1.324376199616123e-06, "loss": 0.8814, "step": 904 }, { "epoch": 0.8685220729366603, "grad_norm": 0.142578125, "learning_rate": 1.3147792706333976e-06, "loss": 0.8979, "step": 905 }, { "epoch": 0.8694817658349329, "grad_norm": 0.09130859375, "learning_rate": 1.3051823416506717e-06, "loss": 0.9195, "step": 906 }, { "epoch": 0.8704414587332053, "grad_norm": 0.0927734375, "learning_rate": 1.2955854126679462e-06, "loss": 0.8919, "step": 907 }, { "epoch": 0.8714011516314779, "grad_norm": 0.09716796875, "learning_rate": 1.2859884836852207e-06, "loss": 0.8825, "step": 908 }, { "epoch": 0.8723608445297505, "grad_norm": 0.0908203125, "learning_rate": 1.2763915547024953e-06, "loss": 0.7967, "step": 909 }, { "epoch": 0.8733205374280231, "grad_norm": 0.0986328125, "learning_rate": 1.2667946257197696e-06, "loss": 0.8685, "step": 910 }, { "epoch": 0.8742802303262955, "grad_norm": 0.10986328125, "learning_rate": 1.2571976967370441e-06, "loss": 0.8319, "step": 911 }, { "epoch": 0.8752399232245681, "grad_norm": 0.15625, "learning_rate": 1.2476007677543187e-06, "loss": 1.0421, "step": 912 }, { "epoch": 0.8761996161228407, "grad_norm": 0.125, "learning_rate": 1.2380038387715932e-06, "loss": 1.02, "step": 913 }, { "epoch": 0.8771593090211133, "grad_norm": 0.09228515625, "learning_rate": 1.2284069097888675e-06, "loss": 0.8858, "step": 914 }, { "epoch": 0.8781190019193857, "grad_norm": 0.10205078125, "learning_rate": 1.218809980806142e-06, "loss": 0.903, "step": 915 }, { "epoch": 0.8790786948176583, "grad_norm": 0.1298828125, "learning_rate": 1.2092130518234166e-06, "loss": 0.9122, "step": 916 }, { "epoch": 0.8800383877159309, "grad_norm": 0.0927734375, "learning_rate": 1.1996161228406912e-06, "loss": 0.8541, "step": 917 }, { "epoch": 0.8809980806142035, "grad_norm": 0.1025390625, "learning_rate": 1.1900191938579655e-06, "loss": 0.8816, "step": 918 }, { "epoch": 0.8819577735124761, "grad_norm": 0.1064453125, "learning_rate": 1.18042226487524e-06, "loss": 0.8834, "step": 919 }, { "epoch": 0.8829174664107485, "grad_norm": 0.10302734375, "learning_rate": 1.1708253358925146e-06, "loss": 0.9132, "step": 920 }, { "epoch": 0.8838771593090211, "grad_norm": 0.09814453125, "learning_rate": 1.161228406909789e-06, "loss": 0.9044, "step": 921 }, { "epoch": 0.8848368522072937, "grad_norm": 0.0986328125, "learning_rate": 1.1516314779270634e-06, "loss": 0.8791, "step": 922 }, { "epoch": 0.8857965451055663, "grad_norm": 0.150390625, "learning_rate": 1.142034548944338e-06, "loss": 0.8169, "step": 923 }, { "epoch": 0.8867562380038387, "grad_norm": 0.08935546875, "learning_rate": 1.1324376199616125e-06, "loss": 0.8696, "step": 924 }, { "epoch": 0.8877159309021113, "grad_norm": 0.1025390625, "learning_rate": 1.1228406909788868e-06, "loss": 0.908, "step": 925 }, { "epoch": 0.8886756238003839, "grad_norm": 0.09716796875, "learning_rate": 1.1132437619961614e-06, "loss": 0.8748, "step": 926 }, { "epoch": 0.8896353166986565, "grad_norm": 0.08251953125, "learning_rate": 1.1036468330134357e-06, "loss": 0.8716, "step": 927 }, { "epoch": 0.8905950095969289, "grad_norm": 0.09814453125, "learning_rate": 1.0940499040307102e-06, "loss": 0.8613, "step": 928 }, { "epoch": 0.8915547024952015, "grad_norm": 0.091796875, "learning_rate": 1.0844529750479848e-06, "loss": 0.8703, "step": 929 }, { "epoch": 0.8925143953934741, "grad_norm": 0.08740234375, "learning_rate": 1.074856046065259e-06, "loss": 0.8743, "step": 930 }, { "epoch": 0.8934740882917467, "grad_norm": 0.10400390625, "learning_rate": 1.0652591170825336e-06, "loss": 0.8926, "step": 931 }, { "epoch": 0.8944337811900192, "grad_norm": 0.08642578125, "learning_rate": 1.0556621880998082e-06, "loss": 0.8598, "step": 932 }, { "epoch": 0.8953934740882917, "grad_norm": 0.1005859375, "learning_rate": 1.0460652591170827e-06, "loss": 0.9332, "step": 933 }, { "epoch": 0.8963531669865643, "grad_norm": 0.10693359375, "learning_rate": 1.036468330134357e-06, "loss": 0.9341, "step": 934 }, { "epoch": 0.8973128598848369, "grad_norm": 0.12890625, "learning_rate": 1.0268714011516316e-06, "loss": 0.9819, "step": 935 }, { "epoch": 0.8982725527831094, "grad_norm": 0.1005859375, "learning_rate": 1.0172744721689061e-06, "loss": 0.8789, "step": 936 }, { "epoch": 0.8992322456813819, "grad_norm": 0.1513671875, "learning_rate": 1.0076775431861805e-06, "loss": 1.0342, "step": 937 }, { "epoch": 0.9001919385796545, "grad_norm": 0.10498046875, "learning_rate": 9.98080614203455e-07, "loss": 0.8745, "step": 938 }, { "epoch": 0.9011516314779271, "grad_norm": 0.1123046875, "learning_rate": 9.884836852207293e-07, "loss": 0.8975, "step": 939 }, { "epoch": 0.9021113243761996, "grad_norm": 0.08447265625, "learning_rate": 9.788867562380039e-07, "loss": 0.9254, "step": 940 }, { "epoch": 0.9030710172744721, "grad_norm": 0.09326171875, "learning_rate": 9.692898272552784e-07, "loss": 0.8642, "step": 941 }, { "epoch": 0.9040307101727447, "grad_norm": 0.1591796875, "learning_rate": 9.59692898272553e-07, "loss": 1.0205, "step": 942 }, { "epoch": 0.9049904030710173, "grad_norm": 0.09521484375, "learning_rate": 9.500959692898274e-07, "loss": 0.8844, "step": 943 }, { "epoch": 0.9059500959692899, "grad_norm": 0.107421875, "learning_rate": 9.404990403071018e-07, "loss": 0.8562, "step": 944 }, { "epoch": 0.9069097888675623, "grad_norm": 0.09375, "learning_rate": 9.309021113243763e-07, "loss": 0.8775, "step": 945 }, { "epoch": 0.9078694817658349, "grad_norm": 0.0869140625, "learning_rate": 9.213051823416508e-07, "loss": 0.8285, "step": 946 }, { "epoch": 0.9088291746641075, "grad_norm": 0.09521484375, "learning_rate": 9.117082533589253e-07, "loss": 0.8177, "step": 947 }, { "epoch": 0.9097888675623801, "grad_norm": 0.11279296875, "learning_rate": 9.021113243761997e-07, "loss": 0.939, "step": 948 }, { "epoch": 0.9107485604606526, "grad_norm": 0.08447265625, "learning_rate": 8.925143953934741e-07, "loss": 0.8413, "step": 949 }, { "epoch": 0.9117082533589251, "grad_norm": 0.099609375, "learning_rate": 8.829174664107486e-07, "loss": 0.9704, "step": 950 }, { "epoch": 0.9126679462571977, "grad_norm": 0.158203125, "learning_rate": 8.73320537428023e-07, "loss": 0.9006, "step": 951 }, { "epoch": 0.9136276391554703, "grad_norm": 0.11279296875, "learning_rate": 8.637236084452976e-07, "loss": 0.8903, "step": 952 }, { "epoch": 0.9145873320537428, "grad_norm": 0.1298828125, "learning_rate": 8.54126679462572e-07, "loss": 0.9654, "step": 953 }, { "epoch": 0.9155470249520153, "grad_norm": 0.0966796875, "learning_rate": 8.445297504798465e-07, "loss": 0.9326, "step": 954 }, { "epoch": 0.9165067178502879, "grad_norm": 0.09814453125, "learning_rate": 8.34932821497121e-07, "loss": 0.9402, "step": 955 }, { "epoch": 0.9174664107485605, "grad_norm": 0.11376953125, "learning_rate": 8.253358925143955e-07, "loss": 0.9127, "step": 956 }, { "epoch": 0.918426103646833, "grad_norm": 0.10986328125, "learning_rate": 8.1573896353167e-07, "loss": 0.8601, "step": 957 }, { "epoch": 0.9193857965451055, "grad_norm": 0.1015625, "learning_rate": 8.061420345489445e-07, "loss": 0.8914, "step": 958 }, { "epoch": 0.9203454894433781, "grad_norm": 0.271484375, "learning_rate": 7.965451055662189e-07, "loss": 0.8644, "step": 959 }, { "epoch": 0.9213051823416507, "grad_norm": 0.0908203125, "learning_rate": 7.869481765834934e-07, "loss": 0.876, "step": 960 }, { "epoch": 0.9222648752399232, "grad_norm": 0.11328125, "learning_rate": 7.773512476007679e-07, "loss": 0.9974, "step": 961 }, { "epoch": 0.9232245681381958, "grad_norm": 0.103515625, "learning_rate": 7.677543186180422e-07, "loss": 0.9056, "step": 962 }, { "epoch": 0.9241842610364683, "grad_norm": 0.1611328125, "learning_rate": 7.581573896353168e-07, "loss": 1.0843, "step": 963 }, { "epoch": 0.9251439539347409, "grad_norm": 0.0966796875, "learning_rate": 7.485604606525912e-07, "loss": 0.9126, "step": 964 }, { "epoch": 0.9261036468330134, "grad_norm": 0.1103515625, "learning_rate": 7.389635316698656e-07, "loss": 0.8867, "step": 965 }, { "epoch": 0.927063339731286, "grad_norm": 0.1025390625, "learning_rate": 7.293666026871402e-07, "loss": 0.881, "step": 966 }, { "epoch": 0.9280230326295585, "grad_norm": 0.10205078125, "learning_rate": 7.197696737044146e-07, "loss": 0.8719, "step": 967 }, { "epoch": 0.9289827255278311, "grad_norm": 0.0927734375, "learning_rate": 7.101727447216891e-07, "loss": 0.8987, "step": 968 }, { "epoch": 0.9299424184261037, "grad_norm": 0.1015625, "learning_rate": 7.005758157389636e-07, "loss": 0.905, "step": 969 }, { "epoch": 0.9309021113243762, "grad_norm": 0.091796875, "learning_rate": 6.909788867562381e-07, "loss": 0.838, "step": 970 }, { "epoch": 0.9318618042226487, "grad_norm": 0.099609375, "learning_rate": 6.813819577735125e-07, "loss": 0.939, "step": 971 }, { "epoch": 0.9328214971209213, "grad_norm": 0.103515625, "learning_rate": 6.717850287907871e-07, "loss": 0.9673, "step": 972 }, { "epoch": 0.9337811900191939, "grad_norm": 0.09521484375, "learning_rate": 6.621880998080615e-07, "loss": 0.8974, "step": 973 }, { "epoch": 0.9347408829174664, "grad_norm": 0.1005859375, "learning_rate": 6.525911708253358e-07, "loss": 0.8756, "step": 974 }, { "epoch": 0.935700575815739, "grad_norm": 0.1103515625, "learning_rate": 6.429942418426104e-07, "loss": 0.9782, "step": 975 }, { "epoch": 0.9366602687140115, "grad_norm": 0.10107421875, "learning_rate": 6.333973128598848e-07, "loss": 0.8836, "step": 976 }, { "epoch": 0.9376199616122841, "grad_norm": 0.1083984375, "learning_rate": 6.238003838771593e-07, "loss": 0.9716, "step": 977 }, { "epoch": 0.9385796545105566, "grad_norm": 0.08984375, "learning_rate": 6.142034548944338e-07, "loss": 0.861, "step": 978 }, { "epoch": 0.9395393474088292, "grad_norm": 0.12353515625, "learning_rate": 6.046065259117083e-07, "loss": 0.9611, "step": 979 }, { "epoch": 0.9404990403071017, "grad_norm": 0.10205078125, "learning_rate": 5.950095969289827e-07, "loss": 0.9402, "step": 980 }, { "epoch": 0.9414587332053743, "grad_norm": 0.08447265625, "learning_rate": 5.854126679462573e-07, "loss": 0.8861, "step": 981 }, { "epoch": 0.9424184261036468, "grad_norm": 0.1025390625, "learning_rate": 5.758157389635317e-07, "loss": 0.9267, "step": 982 }, { "epoch": 0.9433781190019194, "grad_norm": 0.1064453125, "learning_rate": 5.662188099808063e-07, "loss": 0.9603, "step": 983 }, { "epoch": 0.944337811900192, "grad_norm": 0.095703125, "learning_rate": 5.566218809980807e-07, "loss": 0.8911, "step": 984 }, { "epoch": 0.9452975047984645, "grad_norm": 0.08837890625, "learning_rate": 5.470249520153551e-07, "loss": 0.8627, "step": 985 }, { "epoch": 0.946257197696737, "grad_norm": 0.10107421875, "learning_rate": 5.374280230326296e-07, "loss": 0.9447, "step": 986 }, { "epoch": 0.9472168905950096, "grad_norm": 0.10205078125, "learning_rate": 5.278310940499041e-07, "loss": 0.9111, "step": 987 }, { "epoch": 0.9481765834932822, "grad_norm": 0.09716796875, "learning_rate": 5.182341650671785e-07, "loss": 0.9145, "step": 988 }, { "epoch": 0.9491362763915547, "grad_norm": 0.1005859375, "learning_rate": 5.086372360844531e-07, "loss": 0.9212, "step": 989 }, { "epoch": 0.9500959692898272, "grad_norm": 0.08349609375, "learning_rate": 4.990403071017275e-07, "loss": 0.9028, "step": 990 }, { "epoch": 0.9510556621880998, "grad_norm": 0.09375, "learning_rate": 4.894433781190019e-07, "loss": 0.9005, "step": 991 }, { "epoch": 0.9520153550863724, "grad_norm": 0.1044921875, "learning_rate": 4.798464491362765e-07, "loss": 0.915, "step": 992 }, { "epoch": 0.9529750479846449, "grad_norm": 0.095703125, "learning_rate": 4.702495201535509e-07, "loss": 0.8614, "step": 993 }, { "epoch": 0.9539347408829175, "grad_norm": 0.1025390625, "learning_rate": 4.606525911708254e-07, "loss": 0.8747, "step": 994 }, { "epoch": 0.95489443378119, "grad_norm": 0.0888671875, "learning_rate": 4.5105566218809987e-07, "loss": 0.8996, "step": 995 }, { "epoch": 0.9558541266794626, "grad_norm": 0.11572265625, "learning_rate": 4.414587332053743e-07, "loss": 0.8711, "step": 996 }, { "epoch": 0.9568138195777351, "grad_norm": 0.09423828125, "learning_rate": 4.318618042226488e-07, "loss": 0.8874, "step": 997 }, { "epoch": 0.9577735124760077, "grad_norm": 0.08740234375, "learning_rate": 4.2226487523992327e-07, "loss": 0.8792, "step": 998 }, { "epoch": 0.9587332053742802, "grad_norm": 0.12158203125, "learning_rate": 4.1266794625719776e-07, "loss": 0.9438, "step": 999 }, { "epoch": 0.9596928982725528, "grad_norm": 0.095703125, "learning_rate": 4.0307101727447224e-07, "loss": 0.8969, "step": 1000 }, { "epoch": 0.9606525911708254, "grad_norm": 0.09765625, "learning_rate": 3.934740882917467e-07, "loss": 0.9036, "step": 1001 }, { "epoch": 0.9616122840690979, "grad_norm": 0.095703125, "learning_rate": 3.838771593090211e-07, "loss": 0.8347, "step": 1002 }, { "epoch": 0.9625719769673704, "grad_norm": 0.11669921875, "learning_rate": 3.742802303262956e-07, "loss": 0.9452, "step": 1003 }, { "epoch": 0.963531669865643, "grad_norm": 0.099609375, "learning_rate": 3.646833013435701e-07, "loss": 0.9136, "step": 1004 }, { "epoch": 0.9644913627639156, "grad_norm": 0.0986328125, "learning_rate": 3.5508637236084457e-07, "loss": 0.8792, "step": 1005 }, { "epoch": 0.9654510556621881, "grad_norm": 0.111328125, "learning_rate": 3.4548944337811905e-07, "loss": 0.916, "step": 1006 }, { "epoch": 0.9664107485604606, "grad_norm": 0.10546875, "learning_rate": 3.3589251439539354e-07, "loss": 0.8992, "step": 1007 }, { "epoch": 0.9673704414587332, "grad_norm": 0.09765625, "learning_rate": 3.262955854126679e-07, "loss": 0.9242, "step": 1008 }, { "epoch": 0.9683301343570058, "grad_norm": 0.09912109375, "learning_rate": 3.166986564299424e-07, "loss": 0.91, "step": 1009 }, { "epoch": 0.9692898272552783, "grad_norm": 0.1005859375, "learning_rate": 3.071017274472169e-07, "loss": 0.873, "step": 1010 }, { "epoch": 0.9702495201535508, "grad_norm": 0.1015625, "learning_rate": 2.9750479846449137e-07, "loss": 0.9169, "step": 1011 }, { "epoch": 0.9712092130518234, "grad_norm": 0.11279296875, "learning_rate": 2.8790786948176586e-07, "loss": 0.9147, "step": 1012 }, { "epoch": 0.972168905950096, "grad_norm": 0.1220703125, "learning_rate": 2.7831094049904034e-07, "loss": 0.925, "step": 1013 }, { "epoch": 0.9731285988483686, "grad_norm": 0.0908203125, "learning_rate": 2.687140115163148e-07, "loss": 0.9201, "step": 1014 }, { "epoch": 0.974088291746641, "grad_norm": 0.09130859375, "learning_rate": 2.5911708253358926e-07, "loss": 0.8828, "step": 1015 }, { "epoch": 0.9750479846449136, "grad_norm": 0.119140625, "learning_rate": 2.4952015355086375e-07, "loss": 0.8624, "step": 1016 }, { "epoch": 0.9760076775431862, "grad_norm": 0.0859375, "learning_rate": 2.3992322456813823e-07, "loss": 0.8561, "step": 1017 }, { "epoch": 0.9769673704414588, "grad_norm": 0.0869140625, "learning_rate": 2.303262955854127e-07, "loss": 0.8079, "step": 1018 }, { "epoch": 0.9779270633397313, "grad_norm": 0.1240234375, "learning_rate": 2.2072936660268715e-07, "loss": 0.9034, "step": 1019 }, { "epoch": 0.9788867562380038, "grad_norm": 0.09130859375, "learning_rate": 2.1113243761996164e-07, "loss": 0.8095, "step": 1020 }, { "epoch": 0.9798464491362764, "grad_norm": 0.09814453125, "learning_rate": 2.0153550863723612e-07, "loss": 0.9089, "step": 1021 }, { "epoch": 0.980806142034549, "grad_norm": 0.0849609375, "learning_rate": 1.9193857965451055e-07, "loss": 0.8832, "step": 1022 }, { "epoch": 0.9817658349328215, "grad_norm": 0.11962890625, "learning_rate": 1.8234165067178504e-07, "loss": 0.9369, "step": 1023 }, { "epoch": 0.982725527831094, "grad_norm": 0.1748046875, "learning_rate": 1.7274472168905953e-07, "loss": 1.0612, "step": 1024 }, { "epoch": 0.9836852207293666, "grad_norm": 0.1015625, "learning_rate": 1.6314779270633396e-07, "loss": 0.9416, "step": 1025 }, { "epoch": 0.9846449136276392, "grad_norm": 0.1162109375, "learning_rate": 1.5355086372360844e-07, "loss": 0.9809, "step": 1026 }, { "epoch": 0.9856046065259118, "grad_norm": 0.10791015625, "learning_rate": 1.4395393474088293e-07, "loss": 0.842, "step": 1027 }, { "epoch": 0.9865642994241842, "grad_norm": 0.09130859375, "learning_rate": 1.343570057581574e-07, "loss": 0.9277, "step": 1028 }, { "epoch": 0.9875239923224568, "grad_norm": 0.1103515625, "learning_rate": 1.2476007677543187e-07, "loss": 0.8711, "step": 1029 }, { "epoch": 0.9884836852207294, "grad_norm": 0.08740234375, "learning_rate": 1.1516314779270635e-07, "loss": 0.8977, "step": 1030 }, { "epoch": 0.989443378119002, "grad_norm": 0.09912109375, "learning_rate": 1.0556621880998082e-07, "loss": 0.8632, "step": 1031 }, { "epoch": 0.9904030710172744, "grad_norm": 0.10107421875, "learning_rate": 9.596928982725528e-08, "loss": 0.9557, "step": 1032 }, { "epoch": 0.991362763915547, "grad_norm": 0.09765625, "learning_rate": 8.637236084452976e-08, "loss": 0.8898, "step": 1033 }, { "epoch": 0.9923224568138196, "grad_norm": 0.146484375, "learning_rate": 7.677543186180422e-08, "loss": 0.9666, "step": 1034 }, { "epoch": 0.9932821497120922, "grad_norm": 0.1015625, "learning_rate": 6.71785028790787e-08, "loss": 0.8335, "step": 1035 }, { "epoch": 0.9942418426103646, "grad_norm": 0.095703125, "learning_rate": 5.758157389635317e-08, "loss": 0.9066, "step": 1036 }, { "epoch": 0.9952015355086372, "grad_norm": 0.08740234375, "learning_rate": 4.798464491362764e-08, "loss": 0.8566, "step": 1037 }, { "epoch": 0.9961612284069098, "grad_norm": 0.09033203125, "learning_rate": 3.838771593090211e-08, "loss": 0.8087, "step": 1038 }, { "epoch": 0.9971209213051824, "grad_norm": 0.111328125, "learning_rate": 2.8790786948176586e-08, "loss": 0.8638, "step": 1039 }, { "epoch": 0.9980806142034548, "grad_norm": 0.09912109375, "learning_rate": 1.9193857965451055e-08, "loss": 0.9796, "step": 1040 }, { "epoch": 0.9990403071017274, "grad_norm": 0.11767578125, "learning_rate": 9.596928982725528e-09, "loss": 0.8987, "step": 1041 }, { "epoch": 1.0, "grad_norm": 0.09228515625, "learning_rate": 0.0, "loss": 0.897, "step": 1042 } ], "logging_steps": 1.0, "max_steps": 1042, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 0, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.362875479576019e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }