{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 1563, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0019193857965451055, "grad_norm": 7.7047858238220215, "learning_rate": 6.369426751592358e-08, "loss": 0.8923, "step": 1 }, { "epoch": 0.003838771593090211, "grad_norm": 7.626590251922607, "learning_rate": 1.2738853503184715e-07, "loss": 0.8761, "step": 2 }, { "epoch": 0.005758157389635317, "grad_norm": 6.952117443084717, "learning_rate": 1.9108280254777072e-07, "loss": 0.8441, "step": 3 }, { "epoch": 0.007677543186180422, "grad_norm": 7.109781742095947, "learning_rate": 2.547770700636943e-07, "loss": 0.8563, "step": 4 }, { "epoch": 0.009596928982725527, "grad_norm": 7.674124717712402, "learning_rate": 3.1847133757961787e-07, "loss": 0.8943, "step": 5 }, { "epoch": 0.011516314779270634, "grad_norm": 7.2651872634887695, "learning_rate": 3.8216560509554143e-07, "loss": 0.8734, "step": 6 }, { "epoch": 0.013435700575815739, "grad_norm": 7.7632365226745605, "learning_rate": 4.45859872611465e-07, "loss": 0.9109, "step": 7 }, { "epoch": 0.015355086372360844, "grad_norm": 7.804616451263428, "learning_rate": 5.095541401273886e-07, "loss": 0.8936, "step": 8 }, { "epoch": 0.01727447216890595, "grad_norm": 7.347078323364258, "learning_rate": 5.732484076433121e-07, "loss": 0.903, "step": 9 }, { "epoch": 0.019193857965451054, "grad_norm": 6.871998310089111, "learning_rate": 6.369426751592357e-07, "loss": 0.8845, "step": 10 }, { "epoch": 0.02111324376199616, "grad_norm": 7.332353115081787, "learning_rate": 7.006369426751592e-07, "loss": 0.923, "step": 11 }, { "epoch": 0.023032629558541268, "grad_norm": 6.834127426147461, "learning_rate": 7.643312101910829e-07, "loss": 0.8529, "step": 12 }, { "epoch": 0.02495201535508637, "grad_norm": 6.063100337982178, "learning_rate": 8.280254777070064e-07, "loss": 0.8492, "step": 13 }, { "epoch": 0.026871401151631478, "grad_norm": 5.636206150054932, "learning_rate": 8.9171974522293e-07, "loss": 0.817, "step": 14 }, { "epoch": 0.028790786948176585, "grad_norm": 5.292852878570557, "learning_rate": 9.554140127388537e-07, "loss": 0.7804, "step": 15 }, { "epoch": 0.030710172744721688, "grad_norm": 5.262109279632568, "learning_rate": 1.0191082802547772e-06, "loss": 0.8045, "step": 16 }, { "epoch": 0.03262955854126679, "grad_norm": 4.318991184234619, "learning_rate": 1.0828025477707007e-06, "loss": 0.7702, "step": 17 }, { "epoch": 0.0345489443378119, "grad_norm": 3.0764384269714355, "learning_rate": 1.1464968152866242e-06, "loss": 0.7487, "step": 18 }, { "epoch": 0.036468330134357005, "grad_norm": 3.011914014816284, "learning_rate": 1.210191082802548e-06, "loss": 0.7536, "step": 19 }, { "epoch": 0.03838771593090211, "grad_norm": 2.997311592102051, "learning_rate": 1.2738853503184715e-06, "loss": 0.7263, "step": 20 }, { "epoch": 0.04030710172744722, "grad_norm": 2.806544780731201, "learning_rate": 1.337579617834395e-06, "loss": 0.7147, "step": 21 }, { "epoch": 0.04222648752399232, "grad_norm": 2.4153246879577637, "learning_rate": 1.4012738853503185e-06, "loss": 0.7286, "step": 22 }, { "epoch": 0.044145873320537425, "grad_norm": 2.121650218963623, "learning_rate": 1.4649681528662422e-06, "loss": 0.717, "step": 23 }, { "epoch": 0.046065259117082535, "grad_norm": 2.442418098449707, "learning_rate": 1.5286624203821657e-06, "loss": 0.6633, "step": 24 }, { "epoch": 0.04798464491362764, "grad_norm": 2.7615859508514404, "learning_rate": 1.5923566878980892e-06, "loss": 0.677, "step": 25 }, { "epoch": 0.04990403071017274, "grad_norm": 2.9360690116882324, "learning_rate": 1.6560509554140127e-06, "loss": 0.6782, "step": 26 }, { "epoch": 0.05182341650671785, "grad_norm": 2.780909299850464, "learning_rate": 1.7197452229299363e-06, "loss": 0.6823, "step": 27 }, { "epoch": 0.053742802303262956, "grad_norm": 2.6617624759674072, "learning_rate": 1.78343949044586e-06, "loss": 0.6801, "step": 28 }, { "epoch": 0.05566218809980806, "grad_norm": 2.3536391258239746, "learning_rate": 1.8471337579617835e-06, "loss": 0.6536, "step": 29 }, { "epoch": 0.05758157389635317, "grad_norm": 2.112276315689087, "learning_rate": 1.9108280254777074e-06, "loss": 0.6626, "step": 30 }, { "epoch": 0.05950095969289827, "grad_norm": 1.7578476667404175, "learning_rate": 1.974522292993631e-06, "loss": 0.623, "step": 31 }, { "epoch": 0.061420345489443376, "grad_norm": 1.5121145248413086, "learning_rate": 2.0382165605095544e-06, "loss": 0.6121, "step": 32 }, { "epoch": 0.06333973128598848, "grad_norm": 1.3067290782928467, "learning_rate": 2.101910828025478e-06, "loss": 0.6302, "step": 33 }, { "epoch": 0.06525911708253358, "grad_norm": 1.4578447341918945, "learning_rate": 2.1656050955414015e-06, "loss": 0.6235, "step": 34 }, { "epoch": 0.0671785028790787, "grad_norm": 1.2118321657180786, "learning_rate": 2.229299363057325e-06, "loss": 0.6265, "step": 35 }, { "epoch": 0.0690978886756238, "grad_norm": 1.1362491846084595, "learning_rate": 2.2929936305732485e-06, "loss": 0.5953, "step": 36 }, { "epoch": 0.0710172744721689, "grad_norm": 1.108453631401062, "learning_rate": 2.356687898089172e-06, "loss": 0.6166, "step": 37 }, { "epoch": 0.07293666026871401, "grad_norm": 1.0258468389511108, "learning_rate": 2.420382165605096e-06, "loss": 0.5707, "step": 38 }, { "epoch": 0.07485604606525911, "grad_norm": 0.9499682188034058, "learning_rate": 2.4840764331210194e-06, "loss": 0.523, "step": 39 }, { "epoch": 0.07677543186180422, "grad_norm": 0.9794030785560608, "learning_rate": 2.547770700636943e-06, "loss": 0.5828, "step": 40 }, { "epoch": 0.07869481765834933, "grad_norm": 0.8348785638809204, "learning_rate": 2.6114649681528665e-06, "loss": 0.5765, "step": 41 }, { "epoch": 0.08061420345489444, "grad_norm": 0.713405191898346, "learning_rate": 2.67515923566879e-06, "loss": 0.5334, "step": 42 }, { "epoch": 0.08253358925143954, "grad_norm": 0.6945319175720215, "learning_rate": 2.7388535031847135e-06, "loss": 0.558, "step": 43 }, { "epoch": 0.08445297504798464, "grad_norm": 0.7046851515769958, "learning_rate": 2.802547770700637e-06, "loss": 0.5297, "step": 44 }, { "epoch": 0.08637236084452975, "grad_norm": 0.769294261932373, "learning_rate": 2.8662420382165605e-06, "loss": 0.5681, "step": 45 }, { "epoch": 0.08829174664107485, "grad_norm": 0.8055613040924072, "learning_rate": 2.9299363057324844e-06, "loss": 0.5272, "step": 46 }, { "epoch": 0.09021113243761997, "grad_norm": 0.7861132025718689, "learning_rate": 2.993630573248408e-06, "loss": 0.5442, "step": 47 }, { "epoch": 0.09213051823416507, "grad_norm": 0.6661452054977417, "learning_rate": 3.0573248407643314e-06, "loss": 0.5148, "step": 48 }, { "epoch": 0.09404990403071017, "grad_norm": 0.731586217880249, "learning_rate": 3.121019108280255e-06, "loss": 0.5361, "step": 49 }, { "epoch": 0.09596928982725528, "grad_norm": 0.6772072911262512, "learning_rate": 3.1847133757961785e-06, "loss": 0.5306, "step": 50 }, { "epoch": 0.09788867562380038, "grad_norm": 0.7305465936660767, "learning_rate": 3.248407643312102e-06, "loss": 0.518, "step": 51 }, { "epoch": 0.09980806142034548, "grad_norm": 0.6893017888069153, "learning_rate": 3.3121019108280255e-06, "loss": 0.5358, "step": 52 }, { "epoch": 0.1017274472168906, "grad_norm": 0.7044209837913513, "learning_rate": 3.375796178343949e-06, "loss": 0.5195, "step": 53 }, { "epoch": 0.1036468330134357, "grad_norm": 0.6983616948127747, "learning_rate": 3.4394904458598725e-06, "loss": 0.5265, "step": 54 }, { "epoch": 0.10556621880998081, "grad_norm": 0.6610928177833557, "learning_rate": 3.5031847133757964e-06, "loss": 0.5679, "step": 55 }, { "epoch": 0.10748560460652591, "grad_norm": 0.6470273733139038, "learning_rate": 3.56687898089172e-06, "loss": 0.5069, "step": 56 }, { "epoch": 0.10940499040307101, "grad_norm": 0.581302285194397, "learning_rate": 3.6305732484076435e-06, "loss": 0.5351, "step": 57 }, { "epoch": 0.11132437619961612, "grad_norm": 0.6296789050102234, "learning_rate": 3.694267515923567e-06, "loss": 0.5272, "step": 58 }, { "epoch": 0.11324376199616124, "grad_norm": 0.6180348992347717, "learning_rate": 3.757961783439491e-06, "loss": 0.4878, "step": 59 }, { "epoch": 0.11516314779270634, "grad_norm": 0.7312188744544983, "learning_rate": 3.821656050955415e-06, "loss": 0.4947, "step": 60 }, { "epoch": 0.11708253358925144, "grad_norm": 0.6846826076507568, "learning_rate": 3.885350318471338e-06, "loss": 0.5152, "step": 61 }, { "epoch": 0.11900191938579655, "grad_norm": 0.589976966381073, "learning_rate": 3.949044585987262e-06, "loss": 0.5036, "step": 62 }, { "epoch": 0.12092130518234165, "grad_norm": 0.588501513004303, "learning_rate": 4.012738853503185e-06, "loss": 0.5204, "step": 63 }, { "epoch": 0.12284069097888675, "grad_norm": 0.6650551557540894, "learning_rate": 4.076433121019109e-06, "loss": 0.5074, "step": 64 }, { "epoch": 0.12476007677543186, "grad_norm": 0.566895067691803, "learning_rate": 4.140127388535032e-06, "loss": 0.493, "step": 65 }, { "epoch": 0.12667946257197696, "grad_norm": 0.582000732421875, "learning_rate": 4.203821656050956e-06, "loss": 0.5461, "step": 66 }, { "epoch": 0.12859884836852206, "grad_norm": 0.5721346735954285, "learning_rate": 4.26751592356688e-06, "loss": 0.5221, "step": 67 }, { "epoch": 0.13051823416506717, "grad_norm": 0.5950515866279602, "learning_rate": 4.331210191082803e-06, "loss": 0.5295, "step": 68 }, { "epoch": 0.1324376199616123, "grad_norm": 0.607253909111023, "learning_rate": 4.394904458598727e-06, "loss": 0.493, "step": 69 }, { "epoch": 0.1343570057581574, "grad_norm": 0.60755455493927, "learning_rate": 4.45859872611465e-06, "loss": 0.4648, "step": 70 }, { "epoch": 0.1362763915547025, "grad_norm": 0.6038725972175598, "learning_rate": 4.522292993630574e-06, "loss": 0.4944, "step": 71 }, { "epoch": 0.1381957773512476, "grad_norm": 0.6124449372291565, "learning_rate": 4.585987261146497e-06, "loss": 0.5123, "step": 72 }, { "epoch": 0.1401151631477927, "grad_norm": 0.5548542737960815, "learning_rate": 4.649681528662421e-06, "loss": 0.485, "step": 73 }, { "epoch": 0.1420345489443378, "grad_norm": 0.6782580018043518, "learning_rate": 4.713375796178344e-06, "loss": 0.4895, "step": 74 }, { "epoch": 0.14395393474088292, "grad_norm": 0.6169801950454712, "learning_rate": 4.777070063694268e-06, "loss": 0.46, "step": 75 }, { "epoch": 0.14587332053742802, "grad_norm": 0.552150309085846, "learning_rate": 4.840764331210192e-06, "loss": 0.4875, "step": 76 }, { "epoch": 0.14779270633397312, "grad_norm": 0.5741500854492188, "learning_rate": 4.904458598726115e-06, "loss": 0.4837, "step": 77 }, { "epoch": 0.14971209213051823, "grad_norm": 0.6463334560394287, "learning_rate": 4.968152866242039e-06, "loss": 0.4887, "step": 78 }, { "epoch": 0.15163147792706333, "grad_norm": 0.6143442392349243, "learning_rate": 5.031847133757962e-06, "loss": 0.4719, "step": 79 }, { "epoch": 0.15355086372360843, "grad_norm": 0.5707758665084839, "learning_rate": 5.095541401273886e-06, "loss": 0.4865, "step": 80 }, { "epoch": 0.15547024952015356, "grad_norm": 0.5351803302764893, "learning_rate": 5.159235668789809e-06, "loss": 0.4603, "step": 81 }, { "epoch": 0.15738963531669867, "grad_norm": 0.585965096950531, "learning_rate": 5.222929936305733e-06, "loss": 0.4852, "step": 82 }, { "epoch": 0.15930902111324377, "grad_norm": 0.5982359647750854, "learning_rate": 5.286624203821657e-06, "loss": 0.4852, "step": 83 }, { "epoch": 0.16122840690978887, "grad_norm": 0.5438057780265808, "learning_rate": 5.35031847133758e-06, "loss": 0.4976, "step": 84 }, { "epoch": 0.16314779270633398, "grad_norm": 0.5908392071723938, "learning_rate": 5.414012738853504e-06, "loss": 0.497, "step": 85 }, { "epoch": 0.16506717850287908, "grad_norm": 0.5844436883926392, "learning_rate": 5.477707006369427e-06, "loss": 0.4733, "step": 86 }, { "epoch": 0.16698656429942418, "grad_norm": 0.6251052021980286, "learning_rate": 5.541401273885351e-06, "loss": 0.4578, "step": 87 }, { "epoch": 0.1689059500959693, "grad_norm": 0.6084348559379578, "learning_rate": 5.605095541401274e-06, "loss": 0.4929, "step": 88 }, { "epoch": 0.1708253358925144, "grad_norm": 0.6299620866775513, "learning_rate": 5.668789808917198e-06, "loss": 0.4785, "step": 89 }, { "epoch": 0.1727447216890595, "grad_norm": 0.6048820614814758, "learning_rate": 5.732484076433121e-06, "loss": 0.4772, "step": 90 }, { "epoch": 0.1746641074856046, "grad_norm": 0.6594638228416443, "learning_rate": 5.796178343949045e-06, "loss": 0.4717, "step": 91 }, { "epoch": 0.1765834932821497, "grad_norm": 0.7145056128501892, "learning_rate": 5.859872611464969e-06, "loss": 0.4927, "step": 92 }, { "epoch": 0.1785028790786948, "grad_norm": 0.6843922138214111, "learning_rate": 5.923566878980892e-06, "loss": 0.436, "step": 93 }, { "epoch": 0.18042226487523993, "grad_norm": 0.752804696559906, "learning_rate": 5.987261146496816e-06, "loss": 0.4819, "step": 94 }, { "epoch": 0.18234165067178504, "grad_norm": 0.6009435057640076, "learning_rate": 6.050955414012739e-06, "loss": 0.4873, "step": 95 }, { "epoch": 0.18426103646833014, "grad_norm": 0.7087578177452087, "learning_rate": 6.114649681528663e-06, "loss": 0.4522, "step": 96 }, { "epoch": 0.18618042226487524, "grad_norm": 0.6081191897392273, "learning_rate": 6.178343949044586e-06, "loss": 0.4756, "step": 97 }, { "epoch": 0.18809980806142035, "grad_norm": 0.6481821537017822, "learning_rate": 6.24203821656051e-06, "loss": 0.4684, "step": 98 }, { "epoch": 0.19001919385796545, "grad_norm": 0.5877583622932434, "learning_rate": 6.305732484076433e-06, "loss": 0.4804, "step": 99 }, { "epoch": 0.19193857965451055, "grad_norm": 0.6267633438110352, "learning_rate": 6.369426751592357e-06, "loss": 0.4648, "step": 100 }, { "epoch": 0.19385796545105566, "grad_norm": 0.7318799495697021, "learning_rate": 6.433121019108281e-06, "loss": 0.4716, "step": 101 }, { "epoch": 0.19577735124760076, "grad_norm": 0.6244235038757324, "learning_rate": 6.496815286624204e-06, "loss": 0.4431, "step": 102 }, { "epoch": 0.19769673704414586, "grad_norm": 0.6711713075637817, "learning_rate": 6.560509554140128e-06, "loss": 0.469, "step": 103 }, { "epoch": 0.19961612284069097, "grad_norm": 0.7523443102836609, "learning_rate": 6.624203821656051e-06, "loss": 0.4777, "step": 104 }, { "epoch": 0.20153550863723607, "grad_norm": 0.7269445061683655, "learning_rate": 6.687898089171975e-06, "loss": 0.4631, "step": 105 }, { "epoch": 0.2034548944337812, "grad_norm": 0.6177531480789185, "learning_rate": 6.751592356687898e-06, "loss": 0.4632, "step": 106 }, { "epoch": 0.2053742802303263, "grad_norm": 0.5727670192718506, "learning_rate": 6.815286624203822e-06, "loss": 0.4546, "step": 107 }, { "epoch": 0.2072936660268714, "grad_norm": 0.7229476571083069, "learning_rate": 6.878980891719745e-06, "loss": 0.4592, "step": 108 }, { "epoch": 0.2092130518234165, "grad_norm": 0.6764940619468689, "learning_rate": 6.942675159235669e-06, "loss": 0.4833, "step": 109 }, { "epoch": 0.21113243761996162, "grad_norm": 0.650438129901886, "learning_rate": 7.006369426751593e-06, "loss": 0.4782, "step": 110 }, { "epoch": 0.21305182341650672, "grad_norm": 0.5463731288909912, "learning_rate": 7.070063694267516e-06, "loss": 0.4517, "step": 111 }, { "epoch": 0.21497120921305182, "grad_norm": 0.7589491605758667, "learning_rate": 7.13375796178344e-06, "loss": 0.4472, "step": 112 }, { "epoch": 0.21689059500959693, "grad_norm": 0.7191969752311707, "learning_rate": 7.197452229299363e-06, "loss": 0.4527, "step": 113 }, { "epoch": 0.21880998080614203, "grad_norm": 0.585182785987854, "learning_rate": 7.261146496815287e-06, "loss": 0.4615, "step": 114 }, { "epoch": 0.22072936660268713, "grad_norm": 0.7084754705429077, "learning_rate": 7.32484076433121e-06, "loss": 0.4552, "step": 115 }, { "epoch": 0.22264875239923224, "grad_norm": 0.8056797385215759, "learning_rate": 7.388535031847134e-06, "loss": 0.4657, "step": 116 }, { "epoch": 0.22456813819577734, "grad_norm": 0.615352213382721, "learning_rate": 7.452229299363057e-06, "loss": 0.4609, "step": 117 }, { "epoch": 0.22648752399232247, "grad_norm": 0.7057873010635376, "learning_rate": 7.515923566878982e-06, "loss": 0.4526, "step": 118 }, { "epoch": 0.22840690978886757, "grad_norm": 0.8460391759872437, "learning_rate": 7.579617834394906e-06, "loss": 0.4451, "step": 119 }, { "epoch": 0.23032629558541268, "grad_norm": 0.673334002494812, "learning_rate": 7.64331210191083e-06, "loss": 0.4706, "step": 120 }, { "epoch": 0.23224568138195778, "grad_norm": 0.7368304133415222, "learning_rate": 7.707006369426753e-06, "loss": 0.4498, "step": 121 }, { "epoch": 0.23416506717850288, "grad_norm": 0.732180118560791, "learning_rate": 7.770700636942676e-06, "loss": 0.4568, "step": 122 }, { "epoch": 0.236084452975048, "grad_norm": 0.6829201579093933, "learning_rate": 7.8343949044586e-06, "loss": 0.439, "step": 123 }, { "epoch": 0.2380038387715931, "grad_norm": 0.7564799785614014, "learning_rate": 7.898089171974524e-06, "loss": 0.4535, "step": 124 }, { "epoch": 0.2399232245681382, "grad_norm": 0.83155757188797, "learning_rate": 7.961783439490447e-06, "loss": 0.4469, "step": 125 }, { "epoch": 0.2418426103646833, "grad_norm": 0.7301363945007324, "learning_rate": 8.02547770700637e-06, "loss": 0.4564, "step": 126 }, { "epoch": 0.2437619961612284, "grad_norm": 0.690832793712616, "learning_rate": 8.089171974522295e-06, "loss": 0.4542, "step": 127 }, { "epoch": 0.2456813819577735, "grad_norm": 0.6709951162338257, "learning_rate": 8.152866242038218e-06, "loss": 0.4408, "step": 128 }, { "epoch": 0.2476007677543186, "grad_norm": 0.7975384593009949, "learning_rate": 8.21656050955414e-06, "loss": 0.4575, "step": 129 }, { "epoch": 0.2495201535508637, "grad_norm": 0.7785365581512451, "learning_rate": 8.280254777070064e-06, "loss": 0.4393, "step": 130 }, { "epoch": 0.2514395393474088, "grad_norm": 0.6932369470596313, "learning_rate": 8.343949044585989e-06, "loss": 0.474, "step": 131 }, { "epoch": 0.2533589251439539, "grad_norm": 0.7201568484306335, "learning_rate": 8.407643312101912e-06, "loss": 0.4666, "step": 132 }, { "epoch": 0.255278310940499, "grad_norm": 0.6742739081382751, "learning_rate": 8.471337579617835e-06, "loss": 0.4725, "step": 133 }, { "epoch": 0.2571976967370441, "grad_norm": 0.6920982599258423, "learning_rate": 8.53503184713376e-06, "loss": 0.446, "step": 134 }, { "epoch": 0.2591170825335892, "grad_norm": 0.6603702306747437, "learning_rate": 8.598726114649683e-06, "loss": 0.4586, "step": 135 }, { "epoch": 0.26103646833013433, "grad_norm": 0.675682544708252, "learning_rate": 8.662420382165606e-06, "loss": 0.4762, "step": 136 }, { "epoch": 0.2629558541266795, "grad_norm": 0.5842979550361633, "learning_rate": 8.726114649681529e-06, "loss": 0.4214, "step": 137 }, { "epoch": 0.2648752399232246, "grad_norm": 0.5904569625854492, "learning_rate": 8.789808917197454e-06, "loss": 0.4512, "step": 138 }, { "epoch": 0.2667946257197697, "grad_norm": 0.7376220226287842, "learning_rate": 8.853503184713377e-06, "loss": 0.446, "step": 139 }, { "epoch": 0.2687140115163148, "grad_norm": 0.7589370012283325, "learning_rate": 8.9171974522293e-06, "loss": 0.4596, "step": 140 }, { "epoch": 0.2706333973128599, "grad_norm": 0.6940802931785583, "learning_rate": 8.980891719745225e-06, "loss": 0.467, "step": 141 }, { "epoch": 0.272552783109405, "grad_norm": 0.7665092945098877, "learning_rate": 9.044585987261148e-06, "loss": 0.46, "step": 142 }, { "epoch": 0.2744721689059501, "grad_norm": 0.7572119235992432, "learning_rate": 9.10828025477707e-06, "loss": 0.4443, "step": 143 }, { "epoch": 0.2763915547024952, "grad_norm": 0.8288101553916931, "learning_rate": 9.171974522292994e-06, "loss": 0.4367, "step": 144 }, { "epoch": 0.2783109404990403, "grad_norm": 0.6517613530158997, "learning_rate": 9.235668789808919e-06, "loss": 0.4466, "step": 145 }, { "epoch": 0.2802303262955854, "grad_norm": 0.7318578362464905, "learning_rate": 9.299363057324842e-06, "loss": 0.4408, "step": 146 }, { "epoch": 0.2821497120921305, "grad_norm": 0.7611749768257141, "learning_rate": 9.363057324840765e-06, "loss": 0.4728, "step": 147 }, { "epoch": 0.2840690978886756, "grad_norm": 0.5942909121513367, "learning_rate": 9.426751592356688e-06, "loss": 0.4645, "step": 148 }, { "epoch": 0.28598848368522073, "grad_norm": 0.8540540933609009, "learning_rate": 9.490445859872613e-06, "loss": 0.4617, "step": 149 }, { "epoch": 0.28790786948176583, "grad_norm": 0.7360031604766846, "learning_rate": 9.554140127388536e-06, "loss": 0.4452, "step": 150 }, { "epoch": 0.28982725527831094, "grad_norm": 0.701553225517273, "learning_rate": 9.617834394904459e-06, "loss": 0.4706, "step": 151 }, { "epoch": 0.29174664107485604, "grad_norm": 0.6596494317054749, "learning_rate": 9.681528662420384e-06, "loss": 0.4439, "step": 152 }, { "epoch": 0.29366602687140114, "grad_norm": 0.7518951296806335, "learning_rate": 9.745222929936307e-06, "loss": 0.4582, "step": 153 }, { "epoch": 0.29558541266794625, "grad_norm": 0.618813157081604, "learning_rate": 9.80891719745223e-06, "loss": 0.4519, "step": 154 }, { "epoch": 0.29750479846449135, "grad_norm": 0.7260032296180725, "learning_rate": 9.872611464968153e-06, "loss": 0.4453, "step": 155 }, { "epoch": 0.29942418426103645, "grad_norm": 0.698022723197937, "learning_rate": 9.936305732484078e-06, "loss": 0.4706, "step": 156 }, { "epoch": 0.30134357005758156, "grad_norm": 0.7160188555717468, "learning_rate": 1e-05, "loss": 0.4385, "step": 157 }, { "epoch": 0.30326295585412666, "grad_norm": 0.6388968229293823, "learning_rate": 9.999987518438183e-06, "loss": 0.4322, "step": 158 }, { "epoch": 0.30518234165067176, "grad_norm": 0.6440721750259399, "learning_rate": 9.999950073815046e-06, "loss": 0.4374, "step": 159 }, { "epoch": 0.30710172744721687, "grad_norm": 0.890856146812439, "learning_rate": 9.999887666317538e-06, "loss": 0.4464, "step": 160 }, { "epoch": 0.30902111324376197, "grad_norm": 0.6834772229194641, "learning_rate": 9.999800296257234e-06, "loss": 0.4481, "step": 161 }, { "epoch": 0.31094049904030713, "grad_norm": 0.6732379198074341, "learning_rate": 9.99968796407034e-06, "loss": 0.4301, "step": 162 }, { "epoch": 0.31285988483685223, "grad_norm": 0.6915388703346252, "learning_rate": 9.99955067031769e-06, "loss": 0.4564, "step": 163 }, { "epoch": 0.31477927063339733, "grad_norm": 0.7537623643875122, "learning_rate": 9.99938841568474e-06, "loss": 0.4568, "step": 164 }, { "epoch": 0.31669865642994244, "grad_norm": 0.6741034388542175, "learning_rate": 9.999201200981566e-06, "loss": 0.4427, "step": 165 }, { "epoch": 0.31861804222648754, "grad_norm": 0.7043302655220032, "learning_rate": 9.998989027142861e-06, "loss": 0.4556, "step": 166 }, { "epoch": 0.32053742802303264, "grad_norm": 0.6527972221374512, "learning_rate": 9.998751895227927e-06, "loss": 0.4515, "step": 167 }, { "epoch": 0.32245681381957775, "grad_norm": 0.7115075588226318, "learning_rate": 9.99848980642068e-06, "loss": 0.4343, "step": 168 }, { "epoch": 0.32437619961612285, "grad_norm": 0.6771368384361267, "learning_rate": 9.998202762029626e-06, "loss": 0.4369, "step": 169 }, { "epoch": 0.32629558541266795, "grad_norm": 0.644873321056366, "learning_rate": 9.997890763487869e-06, "loss": 0.43, "step": 170 }, { "epoch": 0.32821497120921306, "grad_norm": 0.6961153149604797, "learning_rate": 9.997553812353106e-06, "loss": 0.4505, "step": 171 }, { "epoch": 0.33013435700575816, "grad_norm": 0.6462439894676208, "learning_rate": 9.997191910307606e-06, "loss": 0.4492, "step": 172 }, { "epoch": 0.33205374280230326, "grad_norm": 0.7376931309700012, "learning_rate": 9.996805059158208e-06, "loss": 0.4795, "step": 173 }, { "epoch": 0.33397312859884837, "grad_norm": 0.6237533688545227, "learning_rate": 9.996393260836317e-06, "loss": 0.4411, "step": 174 }, { "epoch": 0.33589251439539347, "grad_norm": 0.6778257489204407, "learning_rate": 9.995956517397884e-06, "loss": 0.4498, "step": 175 }, { "epoch": 0.3378119001919386, "grad_norm": 0.6935009956359863, "learning_rate": 9.99549483102341e-06, "loss": 0.4487, "step": 176 }, { "epoch": 0.3397312859884837, "grad_norm": 0.5927076935768127, "learning_rate": 9.995008204017914e-06, "loss": 0.4481, "step": 177 }, { "epoch": 0.3416506717850288, "grad_norm": 0.7267325520515442, "learning_rate": 9.99449663881095e-06, "loss": 0.4297, "step": 178 }, { "epoch": 0.3435700575815739, "grad_norm": 0.666922390460968, "learning_rate": 9.99396013795657e-06, "loss": 0.4314, "step": 179 }, { "epoch": 0.345489443378119, "grad_norm": 0.6052606105804443, "learning_rate": 9.993398704133318e-06, "loss": 0.4699, "step": 180 }, { "epoch": 0.3474088291746641, "grad_norm": 0.6971169114112854, "learning_rate": 9.992812340144225e-06, "loss": 0.4399, "step": 181 }, { "epoch": 0.3493282149712092, "grad_norm": 0.7240936160087585, "learning_rate": 9.992201048916783e-06, "loss": 0.4464, "step": 182 }, { "epoch": 0.3512476007677543, "grad_norm": 0.6329814791679382, "learning_rate": 9.991564833502944e-06, "loss": 0.4377, "step": 183 }, { "epoch": 0.3531669865642994, "grad_norm": 0.7257843613624573, "learning_rate": 9.99090369707909e-06, "loss": 0.4438, "step": 184 }, { "epoch": 0.3550863723608445, "grad_norm": 0.7550468444824219, "learning_rate": 9.990217642946028e-06, "loss": 0.4507, "step": 185 }, { "epoch": 0.3570057581573896, "grad_norm": 0.6751057505607605, "learning_rate": 9.989506674528968e-06, "loss": 0.4526, "step": 186 }, { "epoch": 0.35892514395393477, "grad_norm": 0.6514175534248352, "learning_rate": 9.988770795377512e-06, "loss": 0.4471, "step": 187 }, { "epoch": 0.36084452975047987, "grad_norm": 0.6558511853218079, "learning_rate": 9.988010009165622e-06, "loss": 0.4585, "step": 188 }, { "epoch": 0.362763915547025, "grad_norm": 0.7468337416648865, "learning_rate": 9.987224319691624e-06, "loss": 0.4516, "step": 189 }, { "epoch": 0.3646833013435701, "grad_norm": 0.6295021176338196, "learning_rate": 9.986413730878168e-06, "loss": 0.459, "step": 190 }, { "epoch": 0.3666026871401152, "grad_norm": 0.7058722972869873, "learning_rate": 9.98557824677222e-06, "loss": 0.4287, "step": 191 }, { "epoch": 0.3685220729366603, "grad_norm": 0.6618736982345581, "learning_rate": 9.984717871545038e-06, "loss": 0.4624, "step": 192 }, { "epoch": 0.3704414587332054, "grad_norm": 0.6650713086128235, "learning_rate": 9.983832609492154e-06, "loss": 0.458, "step": 193 }, { "epoch": 0.3723608445297505, "grad_norm": 0.6792466044425964, "learning_rate": 9.98292246503335e-06, "loss": 0.4426, "step": 194 }, { "epoch": 0.3742802303262956, "grad_norm": 0.6971078515052795, "learning_rate": 9.981987442712634e-06, "loss": 0.4343, "step": 195 }, { "epoch": 0.3761996161228407, "grad_norm": 0.6061384677886963, "learning_rate": 9.981027547198221e-06, "loss": 0.4126, "step": 196 }, { "epoch": 0.3781190019193858, "grad_norm": 0.654990553855896, "learning_rate": 9.98004278328251e-06, "loss": 0.4378, "step": 197 }, { "epoch": 0.3800383877159309, "grad_norm": 0.6204733848571777, "learning_rate": 9.979033155882058e-06, "loss": 0.4325, "step": 198 }, { "epoch": 0.381957773512476, "grad_norm": 0.7164086699485779, "learning_rate": 9.977998670037554e-06, "loss": 0.4552, "step": 199 }, { "epoch": 0.3838771593090211, "grad_norm": 0.5952450633049011, "learning_rate": 9.976939330913801e-06, "loss": 0.4519, "step": 200 }, { "epoch": 0.3857965451055662, "grad_norm": 0.6936164498329163, "learning_rate": 9.97585514379968e-06, "loss": 0.4455, "step": 201 }, { "epoch": 0.3877159309021113, "grad_norm": 0.8349876999855042, "learning_rate": 9.974746114108129e-06, "loss": 0.4286, "step": 202 }, { "epoch": 0.3896353166986564, "grad_norm": 0.6776987910270691, "learning_rate": 9.973612247376118e-06, "loss": 0.4408, "step": 203 }, { "epoch": 0.3915547024952015, "grad_norm": 0.6398627161979675, "learning_rate": 9.972453549264618e-06, "loss": 0.425, "step": 204 }, { "epoch": 0.3934740882917466, "grad_norm": 0.6982352137565613, "learning_rate": 9.971270025558576e-06, "loss": 0.4527, "step": 205 }, { "epoch": 0.39539347408829173, "grad_norm": 0.5982198119163513, "learning_rate": 9.970061682166878e-06, "loss": 0.4486, "step": 206 }, { "epoch": 0.39731285988483683, "grad_norm": 0.82996666431427, "learning_rate": 9.968828525122331e-06, "loss": 0.4366, "step": 207 }, { "epoch": 0.39923224568138194, "grad_norm": 0.6294369697570801, "learning_rate": 9.967570560581625e-06, "loss": 0.4653, "step": 208 }, { "epoch": 0.40115163147792704, "grad_norm": 0.7105359435081482, "learning_rate": 9.966287794825305e-06, "loss": 0.4388, "step": 209 }, { "epoch": 0.40307101727447214, "grad_norm": 0.7270756363868713, "learning_rate": 9.96498023425774e-06, "loss": 0.4573, "step": 210 }, { "epoch": 0.4049904030710173, "grad_norm": 0.6125174164772034, "learning_rate": 9.963647885407088e-06, "loss": 0.4368, "step": 211 }, { "epoch": 0.4069097888675624, "grad_norm": 0.5908179879188538, "learning_rate": 9.962290754925267e-06, "loss": 0.4304, "step": 212 }, { "epoch": 0.4088291746641075, "grad_norm": 0.640978217124939, "learning_rate": 9.960908849587922e-06, "loss": 0.4332, "step": 213 }, { "epoch": 0.4107485604606526, "grad_norm": 0.7091466784477234, "learning_rate": 9.959502176294384e-06, "loss": 0.4219, "step": 214 }, { "epoch": 0.4126679462571977, "grad_norm": 0.5906879305839539, "learning_rate": 9.958070742067649e-06, "loss": 0.4093, "step": 215 }, { "epoch": 0.4145873320537428, "grad_norm": 0.6954293251037598, "learning_rate": 9.95661455405433e-06, "loss": 0.4557, "step": 216 }, { "epoch": 0.4165067178502879, "grad_norm": 0.7001457214355469, "learning_rate": 9.955133619524623e-06, "loss": 0.441, "step": 217 }, { "epoch": 0.418426103646833, "grad_norm": 0.6486403346061707, "learning_rate": 9.953627945872281e-06, "loss": 0.4367, "step": 218 }, { "epoch": 0.42034548944337813, "grad_norm": 0.7005143165588379, "learning_rate": 9.952097540614571e-06, "loss": 0.425, "step": 219 }, { "epoch": 0.42226487523992323, "grad_norm": 0.7145869731903076, "learning_rate": 9.95054241139223e-06, "loss": 0.4603, "step": 220 }, { "epoch": 0.42418426103646834, "grad_norm": 0.6767684817314148, "learning_rate": 9.948962565969431e-06, "loss": 0.4263, "step": 221 }, { "epoch": 0.42610364683301344, "grad_norm": 0.6702331900596619, "learning_rate": 9.947358012233752e-06, "loss": 0.4552, "step": 222 }, { "epoch": 0.42802303262955854, "grad_norm": 0.7195345163345337, "learning_rate": 9.945728758196129e-06, "loss": 0.4323, "step": 223 }, { "epoch": 0.42994241842610365, "grad_norm": 0.5704677104949951, "learning_rate": 9.944074811990816e-06, "loss": 0.4248, "step": 224 }, { "epoch": 0.43186180422264875, "grad_norm": 0.6638801097869873, "learning_rate": 9.942396181875342e-06, "loss": 0.4302, "step": 225 }, { "epoch": 0.43378119001919385, "grad_norm": 0.6865212917327881, "learning_rate": 9.940692876230482e-06, "loss": 0.4447, "step": 226 }, { "epoch": 0.43570057581573896, "grad_norm": 0.6293583512306213, "learning_rate": 9.938964903560198e-06, "loss": 0.437, "step": 227 }, { "epoch": 0.43761996161228406, "grad_norm": 0.7171194553375244, "learning_rate": 9.937212272491612e-06, "loss": 0.4351, "step": 228 }, { "epoch": 0.43953934740882916, "grad_norm": 0.6544391512870789, "learning_rate": 9.935434991774951e-06, "loss": 0.459, "step": 229 }, { "epoch": 0.44145873320537427, "grad_norm": 0.6353806853294373, "learning_rate": 9.933633070283512e-06, "loss": 0.4284, "step": 230 }, { "epoch": 0.44337811900191937, "grad_norm": 0.6007708311080933, "learning_rate": 9.931806517013612e-06, "loss": 0.4208, "step": 231 }, { "epoch": 0.44529750479846447, "grad_norm": 0.7037277817726135, "learning_rate": 9.929955341084547e-06, "loss": 0.4181, "step": 232 }, { "epoch": 0.4472168905950096, "grad_norm": 0.63409823179245, "learning_rate": 9.928079551738542e-06, "loss": 0.4368, "step": 233 }, { "epoch": 0.4491362763915547, "grad_norm": 0.6313980221748352, "learning_rate": 9.926179158340711e-06, "loss": 0.414, "step": 234 }, { "epoch": 0.4510556621880998, "grad_norm": 0.7628433108329773, "learning_rate": 9.924254170379007e-06, "loss": 0.4274, "step": 235 }, { "epoch": 0.45297504798464494, "grad_norm": 0.6051083207130432, "learning_rate": 9.922304597464167e-06, "loss": 0.4312, "step": 236 }, { "epoch": 0.45489443378119004, "grad_norm": 0.7419398427009583, "learning_rate": 9.92033044932968e-06, "loss": 0.4231, "step": 237 }, { "epoch": 0.45681381957773515, "grad_norm": 0.8991482853889465, "learning_rate": 9.918331735831727e-06, "loss": 0.4245, "step": 238 }, { "epoch": 0.45873320537428025, "grad_norm": 0.6546522974967957, "learning_rate": 9.916308466949134e-06, "loss": 0.4359, "step": 239 }, { "epoch": 0.46065259117082535, "grad_norm": 0.7103986144065857, "learning_rate": 9.914260652783323e-06, "loss": 0.4539, "step": 240 }, { "epoch": 0.46257197696737046, "grad_norm": 0.78874671459198, "learning_rate": 9.912188303558263e-06, "loss": 0.4336, "step": 241 }, { "epoch": 0.46449136276391556, "grad_norm": 0.7283755540847778, "learning_rate": 9.910091429620414e-06, "loss": 0.4112, "step": 242 }, { "epoch": 0.46641074856046066, "grad_norm": 0.6519036293029785, "learning_rate": 9.907970041438683e-06, "loss": 0.421, "step": 243 }, { "epoch": 0.46833013435700577, "grad_norm": 0.7197872400283813, "learning_rate": 9.905824149604363e-06, "loss": 0.4379, "step": 244 }, { "epoch": 0.47024952015355087, "grad_norm": 0.7136171460151672, "learning_rate": 9.903653764831088e-06, "loss": 0.4301, "step": 245 }, { "epoch": 0.472168905950096, "grad_norm": 0.6281402111053467, "learning_rate": 9.901458897954772e-06, "loss": 0.4494, "step": 246 }, { "epoch": 0.4740882917466411, "grad_norm": 0.682167112827301, "learning_rate": 9.899239559933566e-06, "loss": 0.43, "step": 247 }, { "epoch": 0.4760076775431862, "grad_norm": 0.6270762085914612, "learning_rate": 9.896995761847789e-06, "loss": 0.4068, "step": 248 }, { "epoch": 0.4779270633397313, "grad_norm": 0.7494179010391235, "learning_rate": 9.894727514899883e-06, "loss": 0.4318, "step": 249 }, { "epoch": 0.4798464491362764, "grad_norm": 0.6733092069625854, "learning_rate": 9.892434830414354e-06, "loss": 0.4283, "step": 250 }, { "epoch": 0.4817658349328215, "grad_norm": 0.6268540024757385, "learning_rate": 9.890117719837716e-06, "loss": 0.4645, "step": 251 }, { "epoch": 0.4836852207293666, "grad_norm": 0.8000752329826355, "learning_rate": 9.887776194738433e-06, "loss": 0.4272, "step": 252 }, { "epoch": 0.4856046065259117, "grad_norm": 0.8790246844291687, "learning_rate": 9.885410266806858e-06, "loss": 0.4544, "step": 253 }, { "epoch": 0.4875239923224568, "grad_norm": 0.7149198651313782, "learning_rate": 9.883019947855183e-06, "loss": 0.4629, "step": 254 }, { "epoch": 0.4894433781190019, "grad_norm": 0.853471577167511, "learning_rate": 9.880605249817377e-06, "loss": 0.4511, "step": 255 }, { "epoch": 0.491362763915547, "grad_norm": 0.6769025921821594, "learning_rate": 9.878166184749116e-06, "loss": 0.4034, "step": 256 }, { "epoch": 0.4932821497120921, "grad_norm": 0.6373630166053772, "learning_rate": 9.875702764827739e-06, "loss": 0.4276, "step": 257 }, { "epoch": 0.4952015355086372, "grad_norm": 0.7150445580482483, "learning_rate": 9.873215002352177e-06, "loss": 0.4135, "step": 258 }, { "epoch": 0.4971209213051823, "grad_norm": 0.7217190861701965, "learning_rate": 9.870702909742893e-06, "loss": 0.4441, "step": 259 }, { "epoch": 0.4990403071017274, "grad_norm": 0.6131094098091125, "learning_rate": 9.868166499541824e-06, "loss": 0.4395, "step": 260 }, { "epoch": 0.5009596928982726, "grad_norm": 0.7494925260543823, "learning_rate": 9.865605784412316e-06, "loss": 0.4244, "step": 261 }, { "epoch": 0.5028790786948176, "grad_norm": 0.6605070233345032, "learning_rate": 9.863020777139056e-06, "loss": 0.4513, "step": 262 }, { "epoch": 0.5047984644913628, "grad_norm": 0.5873823165893555, "learning_rate": 9.860411490628017e-06, "loss": 0.4255, "step": 263 }, { "epoch": 0.5067178502879078, "grad_norm": 0.8924071788787842, "learning_rate": 9.857777937906385e-06, "loss": 0.4319, "step": 264 }, { "epoch": 0.508637236084453, "grad_norm": 0.6528257131576538, "learning_rate": 9.855120132122503e-06, "loss": 0.4346, "step": 265 }, { "epoch": 0.510556621880998, "grad_norm": 0.7923728227615356, "learning_rate": 9.852438086545798e-06, "loss": 0.4529, "step": 266 }, { "epoch": 0.5124760076775432, "grad_norm": 0.709830105304718, "learning_rate": 9.849731814566713e-06, "loss": 0.4339, "step": 267 }, { "epoch": 0.5143953934740882, "grad_norm": 0.7254382967948914, "learning_rate": 9.847001329696653e-06, "loss": 0.4399, "step": 268 }, { "epoch": 0.5163147792706334, "grad_norm": 0.7680420279502869, "learning_rate": 9.844246645567903e-06, "loss": 0.4364, "step": 269 }, { "epoch": 0.5182341650671785, "grad_norm": 0.7227919697761536, "learning_rate": 9.841467775933566e-06, "loss": 0.4689, "step": 270 }, { "epoch": 0.5201535508637236, "grad_norm": 0.783503532409668, "learning_rate": 9.838664734667496e-06, "loss": 0.4192, "step": 271 }, { "epoch": 0.5220729366602687, "grad_norm": 0.6857229471206665, "learning_rate": 9.835837535764226e-06, "loss": 0.4116, "step": 272 }, { "epoch": 0.5239923224568138, "grad_norm": 0.5939391851425171, "learning_rate": 9.832986193338898e-06, "loss": 0.4632, "step": 273 }, { "epoch": 0.525911708253359, "grad_norm": 0.630614697933197, "learning_rate": 9.830110721627197e-06, "loss": 0.4271, "step": 274 }, { "epoch": 0.527831094049904, "grad_norm": 0.7544052004814148, "learning_rate": 9.827211134985273e-06, "loss": 0.4276, "step": 275 }, { "epoch": 0.5297504798464492, "grad_norm": 0.637011706829071, "learning_rate": 9.824287447889675e-06, "loss": 0.4166, "step": 276 }, { "epoch": 0.5316698656429942, "grad_norm": 0.6583498120307922, "learning_rate": 9.821339674937274e-06, "loss": 0.4165, "step": 277 }, { "epoch": 0.5335892514395394, "grad_norm": 0.6649726033210754, "learning_rate": 9.818367830845193e-06, "loss": 0.4441, "step": 278 }, { "epoch": 0.5355086372360844, "grad_norm": 0.6893701553344727, "learning_rate": 9.815371930450737e-06, "loss": 0.4405, "step": 279 }, { "epoch": 0.5374280230326296, "grad_norm": 0.7199766039848328, "learning_rate": 9.812351988711312e-06, "loss": 0.4362, "step": 280 }, { "epoch": 0.5393474088291746, "grad_norm": 0.6347528100013733, "learning_rate": 9.809308020704353e-06, "loss": 0.4234, "step": 281 }, { "epoch": 0.5412667946257198, "grad_norm": 0.7851823568344116, "learning_rate": 9.80624004162725e-06, "loss": 0.4674, "step": 282 }, { "epoch": 0.5431861804222649, "grad_norm": 0.7139828205108643, "learning_rate": 9.80314806679727e-06, "loss": 0.4116, "step": 283 }, { "epoch": 0.54510556621881, "grad_norm": 0.5306137204170227, "learning_rate": 9.800032111651486e-06, "loss": 0.4379, "step": 284 }, { "epoch": 0.5470249520153551, "grad_norm": 0.5984044671058655, "learning_rate": 9.79689219174669e-06, "loss": 0.4189, "step": 285 }, { "epoch": 0.5489443378119002, "grad_norm": 0.7445034384727478, "learning_rate": 9.793728322759327e-06, "loss": 0.4311, "step": 286 }, { "epoch": 0.5508637236084453, "grad_norm": 0.5725110173225403, "learning_rate": 9.790540520485402e-06, "loss": 0.4138, "step": 287 }, { "epoch": 0.5527831094049904, "grad_norm": 0.6558986306190491, "learning_rate": 9.78732880084042e-06, "loss": 0.4236, "step": 288 }, { "epoch": 0.5547024952015355, "grad_norm": 0.7439014315605164, "learning_rate": 9.78409317985929e-06, "loss": 0.431, "step": 289 }, { "epoch": 0.5566218809980806, "grad_norm": 0.6103881597518921, "learning_rate": 9.780833673696255e-06, "loss": 0.4102, "step": 290 }, { "epoch": 0.5585412667946257, "grad_norm": 0.5386815071105957, "learning_rate": 9.777550298624805e-06, "loss": 0.4031, "step": 291 }, { "epoch": 0.5604606525911708, "grad_norm": 0.6646689772605896, "learning_rate": 9.774243071037599e-06, "loss": 0.4328, "step": 292 }, { "epoch": 0.5623800383877159, "grad_norm": 0.6359447836875916, "learning_rate": 9.770912007446385e-06, "loss": 0.4319, "step": 293 }, { "epoch": 0.564299424184261, "grad_norm": 0.6127658486366272, "learning_rate": 9.767557124481912e-06, "loss": 0.432, "step": 294 }, { "epoch": 0.5662188099808061, "grad_norm": 0.5677043795585632, "learning_rate": 9.76417843889385e-06, "loss": 0.442, "step": 295 }, { "epoch": 0.5681381957773513, "grad_norm": 0.6375153064727783, "learning_rate": 9.760775967550712e-06, "loss": 0.4154, "step": 296 }, { "epoch": 0.5700575815738963, "grad_norm": 0.6346372961997986, "learning_rate": 9.757349727439759e-06, "loss": 0.4409, "step": 297 }, { "epoch": 0.5719769673704415, "grad_norm": 0.6231582760810852, "learning_rate": 9.753899735666921e-06, "loss": 0.428, "step": 298 }, { "epoch": 0.5738963531669866, "grad_norm": 0.6486530900001526, "learning_rate": 9.750426009456713e-06, "loss": 0.4425, "step": 299 }, { "epoch": 0.5758157389635317, "grad_norm": 0.7415163516998291, "learning_rate": 9.746928566152148e-06, "loss": 0.4222, "step": 300 }, { "epoch": 0.5777351247600768, "grad_norm": 0.6897903084754944, "learning_rate": 9.743407423214643e-06, "loss": 0.4412, "step": 301 }, { "epoch": 0.5796545105566219, "grad_norm": 0.6374213695526123, "learning_rate": 9.739862598223948e-06, "loss": 0.4001, "step": 302 }, { "epoch": 0.581573896353167, "grad_norm": 0.7497037649154663, "learning_rate": 9.736294108878044e-06, "loss": 0.4258, "step": 303 }, { "epoch": 0.5834932821497121, "grad_norm": 0.7362764477729797, "learning_rate": 9.732701972993057e-06, "loss": 0.44, "step": 304 }, { "epoch": 0.5854126679462572, "grad_norm": 0.5571125745773315, "learning_rate": 9.729086208503174e-06, "loss": 0.4179, "step": 305 }, { "epoch": 0.5873320537428023, "grad_norm": 0.7091711759567261, "learning_rate": 9.72544683346055e-06, "loss": 0.4135, "step": 306 }, { "epoch": 0.5892514395393474, "grad_norm": 0.6148259043693542, "learning_rate": 9.72178386603522e-06, "loss": 0.4281, "step": 307 }, { "epoch": 0.5911708253358925, "grad_norm": 0.6411518454551697, "learning_rate": 9.718097324515003e-06, "loss": 0.4267, "step": 308 }, { "epoch": 0.5930902111324377, "grad_norm": 0.6688199639320374, "learning_rate": 9.714387227305422e-06, "loss": 0.4249, "step": 309 }, { "epoch": 0.5950095969289827, "grad_norm": 0.7264848351478577, "learning_rate": 9.710653592929595e-06, "loss": 0.4282, "step": 310 }, { "epoch": 0.5969289827255279, "grad_norm": 0.6971708536148071, "learning_rate": 9.70689644002816e-06, "loss": 0.4237, "step": 311 }, { "epoch": 0.5988483685220729, "grad_norm": 0.7192535996437073, "learning_rate": 9.703115787359173e-06, "loss": 0.4165, "step": 312 }, { "epoch": 0.6007677543186181, "grad_norm": 0.645057201385498, "learning_rate": 9.69931165379801e-06, "loss": 0.4475, "step": 313 }, { "epoch": 0.6026871401151631, "grad_norm": 0.6350524425506592, "learning_rate": 9.695484058337285e-06, "loss": 0.432, "step": 314 }, { "epoch": 0.6046065259117083, "grad_norm": 0.7150298953056335, "learning_rate": 9.691633020086745e-06, "loss": 0.4275, "step": 315 }, { "epoch": 0.6065259117082533, "grad_norm": 0.6280755400657654, "learning_rate": 9.687758558273179e-06, "loss": 0.4436, "step": 316 }, { "epoch": 0.6084452975047985, "grad_norm": 0.6668034791946411, "learning_rate": 9.683860692240322e-06, "loss": 0.4348, "step": 317 }, { "epoch": 0.6103646833013435, "grad_norm": 0.8149353265762329, "learning_rate": 9.679939441448754e-06, "loss": 0.441, "step": 318 }, { "epoch": 0.6122840690978887, "grad_norm": 0.7052530646324158, "learning_rate": 9.67599482547581e-06, "loss": 0.4274, "step": 319 }, { "epoch": 0.6142034548944337, "grad_norm": 0.6295123100280762, "learning_rate": 9.672026864015476e-06, "loss": 0.4367, "step": 320 }, { "epoch": 0.6161228406909789, "grad_norm": 0.7072158455848694, "learning_rate": 9.668035576878296e-06, "loss": 0.414, "step": 321 }, { "epoch": 0.6180422264875239, "grad_norm": 0.6555576324462891, "learning_rate": 9.664020983991269e-06, "loss": 0.4215, "step": 322 }, { "epoch": 0.6199616122840691, "grad_norm": 0.6977773904800415, "learning_rate": 9.65998310539775e-06, "loss": 0.4425, "step": 323 }, { "epoch": 0.6218809980806143, "grad_norm": 0.5918757915496826, "learning_rate": 9.65592196125735e-06, "loss": 0.4152, "step": 324 }, { "epoch": 0.6238003838771593, "grad_norm": 0.5508590340614319, "learning_rate": 9.651837571845842e-06, "loss": 0.402, "step": 325 }, { "epoch": 0.6257197696737045, "grad_norm": 0.7681599259376526, "learning_rate": 9.647729957555045e-06, "loss": 0.4233, "step": 326 }, { "epoch": 0.6276391554702495, "grad_norm": 0.6449990272521973, "learning_rate": 9.643599138892737e-06, "loss": 0.4527, "step": 327 }, { "epoch": 0.6295585412667947, "grad_norm": 0.6518946886062622, "learning_rate": 9.639445136482549e-06, "loss": 0.4065, "step": 328 }, { "epoch": 0.6314779270633397, "grad_norm": 0.668444037437439, "learning_rate": 9.635267971063848e-06, "loss": 0.4133, "step": 329 }, { "epoch": 0.6333973128598849, "grad_norm": 0.5561951994895935, "learning_rate": 9.631067663491663e-06, "loss": 0.4469, "step": 330 }, { "epoch": 0.6353166986564299, "grad_norm": 0.6482459902763367, "learning_rate": 9.626844234736546e-06, "loss": 0.4355, "step": 331 }, { "epoch": 0.6372360844529751, "grad_norm": 0.6004785299301147, "learning_rate": 9.622597705884497e-06, "loss": 0.4161, "step": 332 }, { "epoch": 0.6391554702495201, "grad_norm": 0.6640856862068176, "learning_rate": 9.618328098136838e-06, "loss": 0.4105, "step": 333 }, { "epoch": 0.6410748560460653, "grad_norm": 0.6697112321853638, "learning_rate": 9.614035432810116e-06, "loss": 0.4432, "step": 334 }, { "epoch": 0.6429942418426103, "grad_norm": 0.538751482963562, "learning_rate": 9.609719731336005e-06, "loss": 0.3972, "step": 335 }, { "epoch": 0.6449136276391555, "grad_norm": 0.7190961241722107, "learning_rate": 9.605381015261176e-06, "loss": 0.4086, "step": 336 }, { "epoch": 0.6468330134357005, "grad_norm": 0.5760567784309387, "learning_rate": 9.601019306247214e-06, "loss": 0.4402, "step": 337 }, { "epoch": 0.6487523992322457, "grad_norm": 0.6708976030349731, "learning_rate": 9.596634626070495e-06, "loss": 0.4376, "step": 338 }, { "epoch": 0.6506717850287908, "grad_norm": 0.5737307667732239, "learning_rate": 9.59222699662208e-06, "loss": 0.4184, "step": 339 }, { "epoch": 0.6525911708253359, "grad_norm": 0.7131927013397217, "learning_rate": 9.587796439907609e-06, "loss": 0.4216, "step": 340 }, { "epoch": 0.654510556621881, "grad_norm": 0.6637858748435974, "learning_rate": 9.58334297804719e-06, "loss": 0.4156, "step": 341 }, { "epoch": 0.6564299424184261, "grad_norm": 0.7475433945655823, "learning_rate": 9.578866633275289e-06, "loss": 0.4354, "step": 342 }, { "epoch": 0.6583493282149712, "grad_norm": 0.7131420969963074, "learning_rate": 9.574367427940609e-06, "loss": 0.4073, "step": 343 }, { "epoch": 0.6602687140115163, "grad_norm": 0.7612800598144531, "learning_rate": 9.569845384506001e-06, "loss": 0.3966, "step": 344 }, { "epoch": 0.6621880998080614, "grad_norm": 0.6027848124504089, "learning_rate": 9.565300525548327e-06, "loss": 0.4122, "step": 345 }, { "epoch": 0.6641074856046065, "grad_norm": 0.7464834451675415, "learning_rate": 9.560732873758362e-06, "loss": 0.4241, "step": 346 }, { "epoch": 0.6660268714011516, "grad_norm": 0.6883158683776855, "learning_rate": 9.55614245194068e-06, "loss": 0.4527, "step": 347 }, { "epoch": 0.6679462571976967, "grad_norm": 0.6495918035507202, "learning_rate": 9.551529283013531e-06, "loss": 0.4084, "step": 348 }, { "epoch": 0.6698656429942419, "grad_norm": 0.6851832270622253, "learning_rate": 9.546893390008737e-06, "loss": 0.4179, "step": 349 }, { "epoch": 0.6717850287907869, "grad_norm": 0.755043089389801, "learning_rate": 9.542234796071577e-06, "loss": 0.4407, "step": 350 }, { "epoch": 0.6737044145873321, "grad_norm": 0.7465824484825134, "learning_rate": 9.537553524460656e-06, "loss": 0.4051, "step": 351 }, { "epoch": 0.6756238003838771, "grad_norm": 0.7629840970039368, "learning_rate": 9.53284959854781e-06, "loss": 0.436, "step": 352 }, { "epoch": 0.6775431861804223, "grad_norm": 0.7614500522613525, "learning_rate": 9.528123041817972e-06, "loss": 0.4268, "step": 353 }, { "epoch": 0.6794625719769674, "grad_norm": 0.7468124628067017, "learning_rate": 9.523373877869069e-06, "loss": 0.4327, "step": 354 }, { "epoch": 0.6813819577735125, "grad_norm": 0.6427062749862671, "learning_rate": 9.518602130411894e-06, "loss": 0.4249, "step": 355 }, { "epoch": 0.6833013435700576, "grad_norm": 0.5504583120346069, "learning_rate": 9.513807823269991e-06, "loss": 0.4093, "step": 356 }, { "epoch": 0.6852207293666027, "grad_norm": 0.6913516521453857, "learning_rate": 9.508990980379537e-06, "loss": 0.4319, "step": 357 }, { "epoch": 0.6871401151631478, "grad_norm": 0.7237867712974548, "learning_rate": 9.504151625789223e-06, "loss": 0.44, "step": 358 }, { "epoch": 0.6890595009596929, "grad_norm": 0.5712859034538269, "learning_rate": 9.499289783660126e-06, "loss": 0.4327, "step": 359 }, { "epoch": 0.690978886756238, "grad_norm": 0.5349499583244324, "learning_rate": 9.4944054782656e-06, "loss": 0.4097, "step": 360 }, { "epoch": 0.6928982725527831, "grad_norm": 0.6881182193756104, "learning_rate": 9.489498733991151e-06, "loss": 0.4338, "step": 361 }, { "epoch": 0.6948176583493282, "grad_norm": 0.6559829115867615, "learning_rate": 9.484569575334313e-06, "loss": 0.4308, "step": 362 }, { "epoch": 0.6967370441458733, "grad_norm": 0.6469511389732361, "learning_rate": 9.47961802690452e-06, "loss": 0.4036, "step": 363 }, { "epoch": 0.6986564299424184, "grad_norm": 0.6052860021591187, "learning_rate": 9.474644113423e-06, "loss": 0.444, "step": 364 }, { "epoch": 0.7005758157389635, "grad_norm": 0.7958092093467712, "learning_rate": 9.469647859722634e-06, "loss": 0.4086, "step": 365 }, { "epoch": 0.7024952015355086, "grad_norm": 0.6813905835151672, "learning_rate": 9.464629290747844e-06, "loss": 0.4151, "step": 366 }, { "epoch": 0.7044145873320538, "grad_norm": 0.5990328192710876, "learning_rate": 9.459588431554458e-06, "loss": 0.423, "step": 367 }, { "epoch": 0.7063339731285988, "grad_norm": 0.6492525935173035, "learning_rate": 9.454525307309598e-06, "loss": 0.4054, "step": 368 }, { "epoch": 0.708253358925144, "grad_norm": 0.6735796332359314, "learning_rate": 9.449439943291541e-06, "loss": 0.4149, "step": 369 }, { "epoch": 0.710172744721689, "grad_norm": 0.5798818469047546, "learning_rate": 9.444332364889603e-06, "loss": 0.4175, "step": 370 }, { "epoch": 0.7120921305182342, "grad_norm": 0.608466625213623, "learning_rate": 9.439202597604004e-06, "loss": 0.4195, "step": 371 }, { "epoch": 0.7140115163147792, "grad_norm": 0.6422362327575684, "learning_rate": 9.434050667045747e-06, "loss": 0.4091, "step": 372 }, { "epoch": 0.7159309021113244, "grad_norm": 0.6456029415130615, "learning_rate": 9.42887659893649e-06, "loss": 0.4091, "step": 373 }, { "epoch": 0.7178502879078695, "grad_norm": 0.6626666188240051, "learning_rate": 9.423680419108414e-06, "loss": 0.43, "step": 374 }, { "epoch": 0.7197696737044146, "grad_norm": 0.640640139579773, "learning_rate": 9.41846215350409e-06, "loss": 0.4272, "step": 375 }, { "epoch": 0.7216890595009597, "grad_norm": 0.6671069860458374, "learning_rate": 9.413221828176365e-06, "loss": 0.4121, "step": 376 }, { "epoch": 0.7236084452975048, "grad_norm": 0.6872036457061768, "learning_rate": 9.407959469288215e-06, "loss": 0.4444, "step": 377 }, { "epoch": 0.72552783109405, "grad_norm": 0.5719563364982605, "learning_rate": 9.402675103112625e-06, "loss": 0.4461, "step": 378 }, { "epoch": 0.727447216890595, "grad_norm": 0.6912542581558228, "learning_rate": 9.397368756032445e-06, "loss": 0.445, "step": 379 }, { "epoch": 0.7293666026871402, "grad_norm": 0.6928809881210327, "learning_rate": 9.392040454540284e-06, "loss": 0.4202, "step": 380 }, { "epoch": 0.7312859884836852, "grad_norm": 0.6162976026535034, "learning_rate": 9.386690225238346e-06, "loss": 0.4023, "step": 381 }, { "epoch": 0.7332053742802304, "grad_norm": 0.5914701819419861, "learning_rate": 9.38131809483832e-06, "loss": 0.4151, "step": 382 }, { "epoch": 0.7351247600767754, "grad_norm": 0.6506879925727844, "learning_rate": 9.375924090161238e-06, "loss": 0.4226, "step": 383 }, { "epoch": 0.7370441458733206, "grad_norm": 0.6448559165000916, "learning_rate": 9.37050823813734e-06, "loss": 0.4196, "step": 384 }, { "epoch": 0.7389635316698656, "grad_norm": 0.5928444862365723, "learning_rate": 9.365070565805941e-06, "loss": 0.436, "step": 385 }, { "epoch": 0.7408829174664108, "grad_norm": 0.6238203644752502, "learning_rate": 9.359611100315302e-06, "loss": 0.408, "step": 386 }, { "epoch": 0.7428023032629558, "grad_norm": 0.7343956828117371, "learning_rate": 9.354129868922483e-06, "loss": 0.4096, "step": 387 }, { "epoch": 0.744721689059501, "grad_norm": 0.6534948348999023, "learning_rate": 9.348626898993214e-06, "loss": 0.4209, "step": 388 }, { "epoch": 0.746641074856046, "grad_norm": 0.6664964556694031, "learning_rate": 9.343102218001763e-06, "loss": 0.4303, "step": 389 }, { "epoch": 0.7485604606525912, "grad_norm": 0.5153346061706543, "learning_rate": 9.337555853530785e-06, "loss": 0.3906, "step": 390 }, { "epoch": 0.7504798464491362, "grad_norm": 0.7031687498092651, "learning_rate": 9.331987833271199e-06, "loss": 0.4031, "step": 391 }, { "epoch": 0.7523992322456814, "grad_norm": 0.6371713280677795, "learning_rate": 9.326398185022039e-06, "loss": 0.4229, "step": 392 }, { "epoch": 0.7543186180422264, "grad_norm": 0.5943268537521362, "learning_rate": 9.32078693669032e-06, "loss": 0.4371, "step": 393 }, { "epoch": 0.7562380038387716, "grad_norm": 0.6066738367080688, "learning_rate": 9.315154116290903e-06, "loss": 0.4219, "step": 394 }, { "epoch": 0.7581573896353166, "grad_norm": 0.6300312876701355, "learning_rate": 9.309499751946345e-06, "loss": 0.4308, "step": 395 }, { "epoch": 0.7600767754318618, "grad_norm": 0.6847349405288696, "learning_rate": 9.303823871886763e-06, "loss": 0.4361, "step": 396 }, { "epoch": 0.761996161228407, "grad_norm": 0.756484866142273, "learning_rate": 9.298126504449697e-06, "loss": 0.4442, "step": 397 }, { "epoch": 0.763915547024952, "grad_norm": 0.6708459258079529, "learning_rate": 9.292407678079966e-06, "loss": 0.4425, "step": 398 }, { "epoch": 0.7658349328214972, "grad_norm": 0.6701301336288452, "learning_rate": 9.286667421329523e-06, "loss": 0.431, "step": 399 }, { "epoch": 0.7677543186180422, "grad_norm": 0.6274660229682922, "learning_rate": 9.280905762857315e-06, "loss": 0.4186, "step": 400 }, { "epoch": 0.7696737044145874, "grad_norm": 0.5530282855033875, "learning_rate": 9.275122731429142e-06, "loss": 0.4009, "step": 401 }, { "epoch": 0.7715930902111324, "grad_norm": 0.6932976841926575, "learning_rate": 9.269318355917509e-06, "loss": 0.413, "step": 402 }, { "epoch": 0.7735124760076776, "grad_norm": 0.5396864414215088, "learning_rate": 9.263492665301486e-06, "loss": 0.4447, "step": 403 }, { "epoch": 0.7754318618042226, "grad_norm": 0.573872983455658, "learning_rate": 9.257645688666557e-06, "loss": 0.4286, "step": 404 }, { "epoch": 0.7773512476007678, "grad_norm": 0.7652491927146912, "learning_rate": 9.251777455204485e-06, "loss": 0.4315, "step": 405 }, { "epoch": 0.7792706333973128, "grad_norm": 0.6853858828544617, "learning_rate": 9.245887994213157e-06, "loss": 0.4274, "step": 406 }, { "epoch": 0.781190019193858, "grad_norm": 0.6263227462768555, "learning_rate": 9.239977335096439e-06, "loss": 0.4146, "step": 407 }, { "epoch": 0.783109404990403, "grad_norm": 0.6473245620727539, "learning_rate": 9.234045507364038e-06, "loss": 0.406, "step": 408 }, { "epoch": 0.7850287907869482, "grad_norm": 0.6285684704780579, "learning_rate": 9.228092540631342e-06, "loss": 0.4289, "step": 409 }, { "epoch": 0.7869481765834933, "grad_norm": 0.7281554341316223, "learning_rate": 9.222118464619278e-06, "loss": 0.4221, "step": 410 }, { "epoch": 0.7888675623800384, "grad_norm": 0.6682608723640442, "learning_rate": 9.216123309154169e-06, "loss": 0.4102, "step": 411 }, { "epoch": 0.7907869481765835, "grad_norm": 0.5621716380119324, "learning_rate": 9.210107104167572e-06, "loss": 0.4128, "step": 412 }, { "epoch": 0.7927063339731286, "grad_norm": 0.6747606992721558, "learning_rate": 9.204069879696144e-06, "loss": 0.416, "step": 413 }, { "epoch": 0.7946257197696737, "grad_norm": 0.6111229062080383, "learning_rate": 9.198011665881481e-06, "loss": 0.3827, "step": 414 }, { "epoch": 0.7965451055662188, "grad_norm": 0.5817705392837524, "learning_rate": 9.191932492969972e-06, "loss": 0.4033, "step": 415 }, { "epoch": 0.7984644913627639, "grad_norm": 0.6499997973442078, "learning_rate": 9.185832391312644e-06, "loss": 0.4271, "step": 416 }, { "epoch": 0.800383877159309, "grad_norm": 0.6348892450332642, "learning_rate": 9.179711391365015e-06, "loss": 0.4266, "step": 417 }, { "epoch": 0.8023032629558541, "grad_norm": 0.6846804618835449, "learning_rate": 9.173569523686942e-06, "loss": 0.3957, "step": 418 }, { "epoch": 0.8042226487523992, "grad_norm": 0.6703388094902039, "learning_rate": 9.167406818942468e-06, "loss": 0.4201, "step": 419 }, { "epoch": 0.8061420345489443, "grad_norm": 0.6685073375701904, "learning_rate": 9.161223307899659e-06, "loss": 0.4181, "step": 420 }, { "epoch": 0.8080614203454894, "grad_norm": 0.6837438344955444, "learning_rate": 9.155019021430469e-06, "loss": 0.4199, "step": 421 }, { "epoch": 0.8099808061420346, "grad_norm": 0.5527938604354858, "learning_rate": 9.148793990510573e-06, "loss": 0.4245, "step": 422 }, { "epoch": 0.8119001919385797, "grad_norm": 0.6045551896095276, "learning_rate": 9.142548246219212e-06, "loss": 0.4041, "step": 423 }, { "epoch": 0.8138195777351248, "grad_norm": 0.6247568726539612, "learning_rate": 9.136281819739044e-06, "loss": 0.4156, "step": 424 }, { "epoch": 0.8157389635316699, "grad_norm": 0.5868914723396301, "learning_rate": 9.129994742355985e-06, "loss": 0.4048, "step": 425 }, { "epoch": 0.817658349328215, "grad_norm": 0.6014706492424011, "learning_rate": 9.123687045459052e-06, "loss": 0.4221, "step": 426 }, { "epoch": 0.8195777351247601, "grad_norm": 0.6466706395149231, "learning_rate": 9.117358760540211e-06, "loss": 0.4312, "step": 427 }, { "epoch": 0.8214971209213052, "grad_norm": 0.5578618049621582, "learning_rate": 9.111009919194211e-06, "loss": 0.431, "step": 428 }, { "epoch": 0.8234165067178503, "grad_norm": 0.5872511863708496, "learning_rate": 9.104640553118436e-06, "loss": 0.441, "step": 429 }, { "epoch": 0.8253358925143954, "grad_norm": 0.6125566363334656, "learning_rate": 9.09825069411274e-06, "loss": 0.4232, "step": 430 }, { "epoch": 0.8272552783109405, "grad_norm": 0.6121996641159058, "learning_rate": 9.09184037407929e-06, "loss": 0.426, "step": 431 }, { "epoch": 0.8291746641074856, "grad_norm": 0.5574345588684082, "learning_rate": 9.08540962502241e-06, "loss": 0.4161, "step": 432 }, { "epoch": 0.8310940499040307, "grad_norm": 0.6503835320472717, "learning_rate": 9.078958479048419e-06, "loss": 0.4152, "step": 433 }, { "epoch": 0.8330134357005758, "grad_norm": 0.5975105166435242, "learning_rate": 9.072486968365462e-06, "loss": 0.4383, "step": 434 }, { "epoch": 0.8349328214971209, "grad_norm": 0.5361210107803345, "learning_rate": 9.065995125283367e-06, "loss": 0.4071, "step": 435 }, { "epoch": 0.836852207293666, "grad_norm": 0.6708375811576843, "learning_rate": 9.05948298221347e-06, "loss": 0.4208, "step": 436 }, { "epoch": 0.8387715930902111, "grad_norm": 0.6142942905426025, "learning_rate": 9.052950571668458e-06, "loss": 0.4188, "step": 437 }, { "epoch": 0.8406909788867563, "grad_norm": 0.6574082374572754, "learning_rate": 9.046397926262202e-06, "loss": 0.4312, "step": 438 }, { "epoch": 0.8426103646833013, "grad_norm": 0.6329575181007385, "learning_rate": 9.039825078709606e-06, "loss": 0.4099, "step": 439 }, { "epoch": 0.8445297504798465, "grad_norm": 0.6852837204933167, "learning_rate": 9.033232061826428e-06, "loss": 0.4315, "step": 440 }, { "epoch": 0.8464491362763915, "grad_norm": 0.6040037274360657, "learning_rate": 9.026618908529132e-06, "loss": 0.4117, "step": 441 }, { "epoch": 0.8483685220729367, "grad_norm": 0.6339947581291199, "learning_rate": 9.019985651834703e-06, "loss": 0.4119, "step": 442 }, { "epoch": 0.8502879078694817, "grad_norm": 0.8161340951919556, "learning_rate": 9.013332324860508e-06, "loss": 0.4314, "step": 443 }, { "epoch": 0.8522072936660269, "grad_norm": 0.6228960156440735, "learning_rate": 9.00665896082411e-06, "loss": 0.4089, "step": 444 }, { "epoch": 0.8541266794625719, "grad_norm": 0.6586893200874329, "learning_rate": 8.999965593043113e-06, "loss": 0.4375, "step": 445 }, { "epoch": 0.8560460652591171, "grad_norm": 0.7748872637748718, "learning_rate": 8.993252254934987e-06, "loss": 0.4178, "step": 446 }, { "epoch": 0.8579654510556622, "grad_norm": 0.6543998122215271, "learning_rate": 8.986518980016914e-06, "loss": 0.399, "step": 447 }, { "epoch": 0.8598848368522073, "grad_norm": 0.6150964498519897, "learning_rate": 8.979765801905604e-06, "loss": 0.4249, "step": 448 }, { "epoch": 0.8618042226487524, "grad_norm": 0.5787307024002075, "learning_rate": 8.972992754317144e-06, "loss": 0.4169, "step": 449 }, { "epoch": 0.8637236084452975, "grad_norm": 0.5482875108718872, "learning_rate": 8.96619987106682e-06, "loss": 0.4106, "step": 450 }, { "epoch": 0.8656429942418427, "grad_norm": 0.5478411912918091, "learning_rate": 8.95938718606895e-06, "loss": 0.4172, "step": 451 }, { "epoch": 0.8675623800383877, "grad_norm": 0.5522469282150269, "learning_rate": 8.952554733336706e-06, "loss": 0.425, "step": 452 }, { "epoch": 0.8694817658349329, "grad_norm": 0.5841178894042969, "learning_rate": 8.94570254698197e-06, "loss": 0.4203, "step": 453 }, { "epoch": 0.8714011516314779, "grad_norm": 0.5829973816871643, "learning_rate": 8.93883066121513e-06, "loss": 0.3909, "step": 454 }, { "epoch": 0.8733205374280231, "grad_norm": 0.5647228360176086, "learning_rate": 8.931939110344935e-06, "loss": 0.3926, "step": 455 }, { "epoch": 0.8752399232245681, "grad_norm": 0.6711538434028625, "learning_rate": 8.925027928778314e-06, "loss": 0.4281, "step": 456 }, { "epoch": 0.8771593090211133, "grad_norm": 0.6501456499099731, "learning_rate": 8.9180971510202e-06, "loss": 0.3947, "step": 457 }, { "epoch": 0.8790786948176583, "grad_norm": 0.7035219669342041, "learning_rate": 8.911146811673368e-06, "loss": 0.4267, "step": 458 }, { "epoch": 0.8809980806142035, "grad_norm": 0.6232694387435913, "learning_rate": 8.904176945438255e-06, "loss": 0.4036, "step": 459 }, { "epoch": 0.8829174664107485, "grad_norm": 0.561282217502594, "learning_rate": 8.897187587112783e-06, "loss": 0.4022, "step": 460 }, { "epoch": 0.8848368522072937, "grad_norm": 0.5832907557487488, "learning_rate": 8.890178771592198e-06, "loss": 0.3996, "step": 461 }, { "epoch": 0.8867562380038387, "grad_norm": 0.623421311378479, "learning_rate": 8.883150533868888e-06, "loss": 0.423, "step": 462 }, { "epoch": 0.8886756238003839, "grad_norm": 0.5810384154319763, "learning_rate": 8.8761029090322e-06, "loss": 0.4215, "step": 463 }, { "epoch": 0.8905950095969289, "grad_norm": 0.5524477362632751, "learning_rate": 8.869035932268285e-06, "loss": 0.4198, "step": 464 }, { "epoch": 0.8925143953934741, "grad_norm": 0.580869197845459, "learning_rate": 8.861949638859908e-06, "loss": 0.4041, "step": 465 }, { "epoch": 0.8944337811900192, "grad_norm": 0.6892798542976379, "learning_rate": 8.854844064186267e-06, "loss": 0.4238, "step": 466 }, { "epoch": 0.8963531669865643, "grad_norm": 0.5783049464225769, "learning_rate": 8.847719243722835e-06, "loss": 0.4192, "step": 467 }, { "epoch": 0.8982725527831094, "grad_norm": 0.5994821190834045, "learning_rate": 8.840575213041161e-06, "loss": 0.4531, "step": 468 }, { "epoch": 0.9001919385796545, "grad_norm": 0.6587652564048767, "learning_rate": 8.833412007808714e-06, "loss": 0.4211, "step": 469 }, { "epoch": 0.9021113243761996, "grad_norm": 0.5774714350700378, "learning_rate": 8.826229663788688e-06, "loss": 0.4289, "step": 470 }, { "epoch": 0.9040307101727447, "grad_norm": 0.5932385325431824, "learning_rate": 8.819028216839831e-06, "loss": 0.3921, "step": 471 }, { "epoch": 0.9059500959692899, "grad_norm": 0.6074923276901245, "learning_rate": 8.811807702916266e-06, "loss": 0.404, "step": 472 }, { "epoch": 0.9078694817658349, "grad_norm": 0.6252521276473999, "learning_rate": 8.804568158067308e-06, "loss": 0.4242, "step": 473 }, { "epoch": 0.9097888675623801, "grad_norm": 0.5919908285140991, "learning_rate": 8.797309618437289e-06, "loss": 0.4102, "step": 474 }, { "epoch": 0.9117082533589251, "grad_norm": 0.5931584239006042, "learning_rate": 8.790032120265373e-06, "loss": 0.415, "step": 475 }, { "epoch": 0.9136276391554703, "grad_norm": 0.6184658408164978, "learning_rate": 8.782735699885378e-06, "loss": 0.3903, "step": 476 }, { "epoch": 0.9155470249520153, "grad_norm": 0.6621074676513672, "learning_rate": 8.775420393725592e-06, "loss": 0.406, "step": 477 }, { "epoch": 0.9174664107485605, "grad_norm": 0.644110381603241, "learning_rate": 8.768086238308591e-06, "loss": 0.4193, "step": 478 }, { "epoch": 0.9193857965451055, "grad_norm": 0.6531520485877991, "learning_rate": 8.760733270251065e-06, "loss": 0.4136, "step": 479 }, { "epoch": 0.9213051823416507, "grad_norm": 0.5932905077934265, "learning_rate": 8.753361526263622e-06, "loss": 0.4072, "step": 480 }, { "epoch": 0.9232245681381958, "grad_norm": 0.5779323577880859, "learning_rate": 8.745971043150614e-06, "loss": 0.402, "step": 481 }, { "epoch": 0.9251439539347409, "grad_norm": 0.5831919312477112, "learning_rate": 8.73856185780995e-06, "loss": 0.4267, "step": 482 }, { "epoch": 0.927063339731286, "grad_norm": 0.6681051254272461, "learning_rate": 8.73113400723291e-06, "loss": 0.4237, "step": 483 }, { "epoch": 0.9289827255278311, "grad_norm": 0.608217179775238, "learning_rate": 8.723687528503966e-06, "loss": 0.4131, "step": 484 }, { "epoch": 0.9309021113243762, "grad_norm": 0.6981887817382812, "learning_rate": 8.716222458800591e-06, "loss": 0.4077, "step": 485 }, { "epoch": 0.9328214971209213, "grad_norm": 0.832606315612793, "learning_rate": 8.708738835393079e-06, "loss": 0.4321, "step": 486 }, { "epoch": 0.9347408829174664, "grad_norm": 0.6175947785377502, "learning_rate": 8.70123669564435e-06, "loss": 0.4072, "step": 487 }, { "epoch": 0.9366602687140115, "grad_norm": 0.8108631372451782, "learning_rate": 8.693716077009776e-06, "loss": 0.4131, "step": 488 }, { "epoch": 0.9385796545105566, "grad_norm": 0.7265631556510925, "learning_rate": 8.686177017036979e-06, "loss": 0.4041, "step": 489 }, { "epoch": 0.9404990403071017, "grad_norm": 0.6613566279411316, "learning_rate": 8.67861955336566e-06, "loss": 0.4165, "step": 490 }, { "epoch": 0.9424184261036468, "grad_norm": 0.7679793834686279, "learning_rate": 8.671043723727396e-06, "loss": 0.4243, "step": 491 }, { "epoch": 0.944337811900192, "grad_norm": 0.645136833190918, "learning_rate": 8.663449565945463e-06, "loss": 0.3952, "step": 492 }, { "epoch": 0.946257197696737, "grad_norm": 0.5956749320030212, "learning_rate": 8.655837117934642e-06, "loss": 0.4066, "step": 493 }, { "epoch": 0.9481765834932822, "grad_norm": 0.7000584006309509, "learning_rate": 8.648206417701028e-06, "loss": 0.4318, "step": 494 }, { "epoch": 0.9500959692898272, "grad_norm": 0.6516624689102173, "learning_rate": 8.640557503341843e-06, "loss": 0.4139, "step": 495 }, { "epoch": 0.9520153550863724, "grad_norm": 0.5945549607276917, "learning_rate": 8.63289041304525e-06, "loss": 0.3956, "step": 496 }, { "epoch": 0.9539347408829175, "grad_norm": 0.6083257794380188, "learning_rate": 8.625205185090147e-06, "loss": 0.428, "step": 497 }, { "epoch": 0.9558541266794626, "grad_norm": 0.5642468929290771, "learning_rate": 8.617501857845998e-06, "loss": 0.3899, "step": 498 }, { "epoch": 0.9577735124760077, "grad_norm": 0.6049384474754333, "learning_rate": 8.609780469772623e-06, "loss": 0.4251, "step": 499 }, { "epoch": 0.9596928982725528, "grad_norm": 0.6118859648704529, "learning_rate": 8.602041059420017e-06, "loss": 0.4025, "step": 500 }, { "epoch": 0.9616122840690979, "grad_norm": 0.5905884504318237, "learning_rate": 8.594283665428147e-06, "loss": 0.4078, "step": 501 }, { "epoch": 0.963531669865643, "grad_norm": 0.5631827712059021, "learning_rate": 8.586508326526776e-06, "loss": 0.4216, "step": 502 }, { "epoch": 0.9654510556621881, "grad_norm": 0.6303861737251282, "learning_rate": 8.57871508153525e-06, "loss": 0.4003, "step": 503 }, { "epoch": 0.9673704414587332, "grad_norm": 0.677788257598877, "learning_rate": 8.570903969362314e-06, "loss": 0.4177, "step": 504 }, { "epoch": 0.9692898272552783, "grad_norm": 0.6351187229156494, "learning_rate": 8.563075029005924e-06, "loss": 0.4362, "step": 505 }, { "epoch": 0.9712092130518234, "grad_norm": 0.6144436001777649, "learning_rate": 8.555228299553043e-06, "loss": 0.4019, "step": 506 }, { "epoch": 0.9731285988483686, "grad_norm": 0.7261773347854614, "learning_rate": 8.547363820179442e-06, "loss": 0.42, "step": 507 }, { "epoch": 0.9750479846449136, "grad_norm": 0.557560920715332, "learning_rate": 8.539481630149516e-06, "loss": 0.4255, "step": 508 }, { "epoch": 0.9769673704414588, "grad_norm": 0.5472039580345154, "learning_rate": 8.531581768816085e-06, "loss": 0.4063, "step": 509 }, { "epoch": 0.9788867562380038, "grad_norm": 0.6655720472335815, "learning_rate": 8.523664275620185e-06, "loss": 0.4174, "step": 510 }, { "epoch": 0.980806142034549, "grad_norm": 0.5904672145843506, "learning_rate": 8.515729190090895e-06, "loss": 0.4156, "step": 511 }, { "epoch": 0.982725527831094, "grad_norm": 0.5816423296928406, "learning_rate": 8.507776551845119e-06, "loss": 0.4271, "step": 512 }, { "epoch": 0.9846449136276392, "grad_norm": 0.6029289960861206, "learning_rate": 8.499806400587391e-06, "loss": 0.4052, "step": 513 }, { "epoch": 0.9865642994241842, "grad_norm": 0.6038168668746948, "learning_rate": 8.491818776109691e-06, "loss": 0.4254, "step": 514 }, { "epoch": 0.9884836852207294, "grad_norm": 0.6169793009757996, "learning_rate": 8.483813718291223e-06, "loss": 0.4181, "step": 515 }, { "epoch": 0.9904030710172744, "grad_norm": 0.686318576335907, "learning_rate": 8.475791267098243e-06, "loss": 0.4125, "step": 516 }, { "epoch": 0.9923224568138196, "grad_norm": 0.5344783067703247, "learning_rate": 8.467751462583837e-06, "loss": 0.4211, "step": 517 }, { "epoch": 0.9942418426103646, "grad_norm": 0.6029199957847595, "learning_rate": 8.459694344887732e-06, "loss": 0.4312, "step": 518 }, { "epoch": 0.9961612284069098, "grad_norm": 0.8031718730926514, "learning_rate": 8.451619954236093e-06, "loss": 0.4184, "step": 519 }, { "epoch": 0.9980806142034548, "grad_norm": 0.6263314485549927, "learning_rate": 8.443528330941322e-06, "loss": 0.424, "step": 520 }, { "epoch": 1.0, "grad_norm": 0.6788792014122009, "learning_rate": 8.435419515401856e-06, "loss": 0.3933, "step": 521 }, { "epoch": 1.0019193857965452, "grad_norm": 0.7001379728317261, "learning_rate": 8.427293548101971e-06, "loss": 0.3687, "step": 522 }, { "epoch": 1.0038387715930903, "grad_norm": 0.557686984539032, "learning_rate": 8.419150469611572e-06, "loss": 0.3585, "step": 523 }, { "epoch": 1.0057581573896353, "grad_norm": 0.7581077218055725, "learning_rate": 8.410990320585993e-06, "loss": 0.377, "step": 524 }, { "epoch": 1.0076775431861804, "grad_norm": 0.6315445899963379, "learning_rate": 8.402813141765796e-06, "loss": 0.3614, "step": 525 }, { "epoch": 1.0095969289827256, "grad_norm": 0.6280166506767273, "learning_rate": 8.394618973976566e-06, "loss": 0.3444, "step": 526 }, { "epoch": 1.0115163147792707, "grad_norm": 0.7165639400482178, "learning_rate": 8.386407858128707e-06, "loss": 0.3744, "step": 527 }, { "epoch": 1.0134357005758157, "grad_norm": 0.6100614070892334, "learning_rate": 8.378179835217239e-06, "loss": 0.3692, "step": 528 }, { "epoch": 1.0153550863723608, "grad_norm": 0.6891491413116455, "learning_rate": 8.369934946321594e-06, "loss": 0.3383, "step": 529 }, { "epoch": 1.017274472168906, "grad_norm": 0.6005285382270813, "learning_rate": 8.361673232605408e-06, "loss": 0.366, "step": 530 }, { "epoch": 1.0191938579654511, "grad_norm": 0.6106036901473999, "learning_rate": 8.353394735316317e-06, "loss": 0.361, "step": 531 }, { "epoch": 1.021113243761996, "grad_norm": 0.5837318897247314, "learning_rate": 8.345099495785753e-06, "loss": 0.3921, "step": 532 }, { "epoch": 1.0230326295585412, "grad_norm": 0.6212928295135498, "learning_rate": 8.336787555428728e-06, "loss": 0.3516, "step": 533 }, { "epoch": 1.0249520153550864, "grad_norm": 0.5940003991127014, "learning_rate": 8.328458955743648e-06, "loss": 0.3829, "step": 534 }, { "epoch": 1.0268714011516316, "grad_norm": 0.5731555819511414, "learning_rate": 8.320113738312081e-06, "loss": 0.3709, "step": 535 }, { "epoch": 1.0287907869481765, "grad_norm": 0.5940685272216797, "learning_rate": 8.311751944798569e-06, "loss": 0.3621, "step": 536 }, { "epoch": 1.0307101727447217, "grad_norm": 0.6581727862358093, "learning_rate": 8.303373616950408e-06, "loss": 0.3568, "step": 537 }, { "epoch": 1.0326295585412668, "grad_norm": 0.6463356018066406, "learning_rate": 8.294978796597444e-06, "loss": 0.363, "step": 538 }, { "epoch": 1.034548944337812, "grad_norm": 0.6287731528282166, "learning_rate": 8.286567525651865e-06, "loss": 0.3614, "step": 539 }, { "epoch": 1.036468330134357, "grad_norm": 0.5947986245155334, "learning_rate": 8.27813984610799e-06, "loss": 0.3669, "step": 540 }, { "epoch": 1.038387715930902, "grad_norm": 0.5899643301963806, "learning_rate": 8.269695800042061e-06, "loss": 0.3768, "step": 541 }, { "epoch": 1.0403071017274472, "grad_norm": 0.621016800403595, "learning_rate": 8.261235429612032e-06, "loss": 0.3463, "step": 542 }, { "epoch": 1.0422264875239924, "grad_norm": 0.5043175220489502, "learning_rate": 8.252758777057355e-06, "loss": 0.3633, "step": 543 }, { "epoch": 1.0441458733205373, "grad_norm": 0.5778144598007202, "learning_rate": 8.244265884698777e-06, "loss": 0.3596, "step": 544 }, { "epoch": 1.0460652591170825, "grad_norm": 0.6812480092048645, "learning_rate": 8.235756794938123e-06, "loss": 0.3599, "step": 545 }, { "epoch": 1.0479846449136276, "grad_norm": 0.5301434397697449, "learning_rate": 8.227231550258084e-06, "loss": 0.3581, "step": 546 }, { "epoch": 1.0499040307101728, "grad_norm": 0.5746753811836243, "learning_rate": 8.218690193222007e-06, "loss": 0.3673, "step": 547 }, { "epoch": 1.051823416506718, "grad_norm": 0.5694441795349121, "learning_rate": 8.210132766473682e-06, "loss": 0.3573, "step": 548 }, { "epoch": 1.053742802303263, "grad_norm": 0.5925020575523376, "learning_rate": 8.201559312737131e-06, "loss": 0.3733, "step": 549 }, { "epoch": 1.055662188099808, "grad_norm": 0.6126152276992798, "learning_rate": 8.19296987481639e-06, "loss": 0.3708, "step": 550 }, { "epoch": 1.0575815738963532, "grad_norm": 0.5762340426445007, "learning_rate": 8.1843644955953e-06, "loss": 0.3679, "step": 551 }, { "epoch": 1.0595009596928984, "grad_norm": 0.5841948986053467, "learning_rate": 8.17574321803729e-06, "loss": 0.3801, "step": 552 }, { "epoch": 1.0614203454894433, "grad_norm": 0.5894670486450195, "learning_rate": 8.167106085185161e-06, "loss": 0.3845, "step": 553 }, { "epoch": 1.0633397312859885, "grad_norm": 0.5701922178268433, "learning_rate": 8.158453140160881e-06, "loss": 0.3665, "step": 554 }, { "epoch": 1.0652591170825336, "grad_norm": 0.577236533164978, "learning_rate": 8.149784426165351e-06, "loss": 0.3704, "step": 555 }, { "epoch": 1.0671785028790788, "grad_norm": 0.5258080959320068, "learning_rate": 8.141099986478212e-06, "loss": 0.3764, "step": 556 }, { "epoch": 1.0690978886756237, "grad_norm": 0.596666693687439, "learning_rate": 8.13239986445761e-06, "loss": 0.3898, "step": 557 }, { "epoch": 1.0710172744721689, "grad_norm": 0.6044830083847046, "learning_rate": 8.12368410353999e-06, "loss": 0.3683, "step": 558 }, { "epoch": 1.072936660268714, "grad_norm": 0.5639522671699524, "learning_rate": 8.114952747239876e-06, "loss": 0.3651, "step": 559 }, { "epoch": 1.0748560460652592, "grad_norm": 0.573088526725769, "learning_rate": 8.106205839149653e-06, "loss": 0.3575, "step": 560 }, { "epoch": 1.0767754318618041, "grad_norm": 0.5921724438667297, "learning_rate": 8.09744342293935e-06, "loss": 0.3627, "step": 561 }, { "epoch": 1.0786948176583493, "grad_norm": 0.5780190229415894, "learning_rate": 8.088665542356421e-06, "loss": 0.383, "step": 562 }, { "epoch": 1.0806142034548945, "grad_norm": 0.5938214659690857, "learning_rate": 8.079872241225534e-06, "loss": 0.3935, "step": 563 }, { "epoch": 1.0825335892514396, "grad_norm": 0.6124862432479858, "learning_rate": 8.071063563448341e-06, "loss": 0.3599, "step": 564 }, { "epoch": 1.0844529750479845, "grad_norm": 0.6408605575561523, "learning_rate": 8.06223955300326e-06, "loss": 0.3625, "step": 565 }, { "epoch": 1.0863723608445297, "grad_norm": 0.5096073746681213, "learning_rate": 8.053400253945267e-06, "loss": 0.3713, "step": 566 }, { "epoch": 1.0882917466410749, "grad_norm": 0.5798072814941406, "learning_rate": 8.044545710405666e-06, "loss": 0.3739, "step": 567 }, { "epoch": 1.09021113243762, "grad_norm": 0.5914900302886963, "learning_rate": 8.035675966591868e-06, "loss": 0.3584, "step": 568 }, { "epoch": 1.092130518234165, "grad_norm": 0.5396993160247803, "learning_rate": 8.026791066787177e-06, "loss": 0.3648, "step": 569 }, { "epoch": 1.0940499040307101, "grad_norm": 0.5560454726219177, "learning_rate": 8.017891055350563e-06, "loss": 0.3852, "step": 570 }, { "epoch": 1.0959692898272553, "grad_norm": 0.6196476221084595, "learning_rate": 8.00897597671644e-06, "loss": 0.3727, "step": 571 }, { "epoch": 1.0978886756238004, "grad_norm": 0.5762874484062195, "learning_rate": 8.000045875394452e-06, "loss": 0.3367, "step": 572 }, { "epoch": 1.0998080614203456, "grad_norm": 0.5936884880065918, "learning_rate": 7.991100795969248e-06, "loss": 0.3712, "step": 573 }, { "epoch": 1.1017274472168905, "grad_norm": 0.5754233598709106, "learning_rate": 7.982140783100248e-06, "loss": 0.3434, "step": 574 }, { "epoch": 1.1036468330134357, "grad_norm": 0.5597128868103027, "learning_rate": 7.973165881521435e-06, "loss": 0.3562, "step": 575 }, { "epoch": 1.1055662188099808, "grad_norm": 0.5983325242996216, "learning_rate": 7.964176136041123e-06, "loss": 0.3895, "step": 576 }, { "epoch": 1.107485604606526, "grad_norm": 0.6327082514762878, "learning_rate": 7.955171591541739e-06, "loss": 0.387, "step": 577 }, { "epoch": 1.109404990403071, "grad_norm": 0.5635960698127747, "learning_rate": 7.946152292979597e-06, "loss": 0.3572, "step": 578 }, { "epoch": 1.111324376199616, "grad_norm": 0.6169804930686951, "learning_rate": 7.937118285384666e-06, "loss": 0.3481, "step": 579 }, { "epoch": 1.1132437619961613, "grad_norm": 0.6488839387893677, "learning_rate": 7.928069613860357e-06, "loss": 0.3755, "step": 580 }, { "epoch": 1.1151631477927064, "grad_norm": 0.5166139602661133, "learning_rate": 7.91900632358329e-06, "loss": 0.3814, "step": 581 }, { "epoch": 1.1170825335892514, "grad_norm": 0.5494997501373291, "learning_rate": 7.909928459803077e-06, "loss": 0.3511, "step": 582 }, { "epoch": 1.1190019193857965, "grad_norm": 0.6587066054344177, "learning_rate": 7.90083606784208e-06, "loss": 0.3604, "step": 583 }, { "epoch": 1.1209213051823417, "grad_norm": 0.6626251339912415, "learning_rate": 7.891729193095201e-06, "loss": 0.3594, "step": 584 }, { "epoch": 1.1228406909788868, "grad_norm": 0.572258472442627, "learning_rate": 7.882607881029652e-06, "loss": 0.3804, "step": 585 }, { "epoch": 1.1247600767754318, "grad_norm": 0.6588909029960632, "learning_rate": 7.873472177184714e-06, "loss": 0.3863, "step": 586 }, { "epoch": 1.126679462571977, "grad_norm": 0.5232745409011841, "learning_rate": 7.864322127171535e-06, "loss": 0.3534, "step": 587 }, { "epoch": 1.128598848368522, "grad_norm": 0.5726982355117798, "learning_rate": 7.855157776672874e-06, "loss": 0.3455, "step": 588 }, { "epoch": 1.1305182341650672, "grad_norm": 0.5649129152297974, "learning_rate": 7.8459791714429e-06, "loss": 0.3817, "step": 589 }, { "epoch": 1.1324376199616122, "grad_norm": 0.55324786901474, "learning_rate": 7.836786357306943e-06, "loss": 0.3694, "step": 590 }, { "epoch": 1.1343570057581573, "grad_norm": 0.5048583149909973, "learning_rate": 7.827579380161272e-06, "loss": 0.35, "step": 591 }, { "epoch": 1.1362763915547025, "grad_norm": 0.5699595808982849, "learning_rate": 7.818358285972871e-06, "loss": 0.3615, "step": 592 }, { "epoch": 1.1381957773512477, "grad_norm": 0.6026802659034729, "learning_rate": 7.809123120779201e-06, "loss": 0.3709, "step": 593 }, { "epoch": 1.1401151631477928, "grad_norm": 0.5223770141601562, "learning_rate": 7.799873930687979e-06, "loss": 0.3689, "step": 594 }, { "epoch": 1.1420345489443378, "grad_norm": 0.7043522000312805, "learning_rate": 7.790610761876936e-06, "loss": 0.3582, "step": 595 }, { "epoch": 1.143953934740883, "grad_norm": 0.5757325291633606, "learning_rate": 7.781333660593599e-06, "loss": 0.3543, "step": 596 }, { "epoch": 1.145873320537428, "grad_norm": 0.5663167238235474, "learning_rate": 7.772042673155057e-06, "loss": 0.3757, "step": 597 }, { "epoch": 1.147792706333973, "grad_norm": 0.6889773607254028, "learning_rate": 7.762737845947719e-06, "loss": 0.3615, "step": 598 }, { "epoch": 1.1497120921305182, "grad_norm": 0.5886605381965637, "learning_rate": 7.753419225427097e-06, "loss": 0.3546, "step": 599 }, { "epoch": 1.1516314779270633, "grad_norm": 0.5525719523429871, "learning_rate": 7.744086858117565e-06, "loss": 0.3743, "step": 600 }, { "epoch": 1.1535508637236085, "grad_norm": 0.6776698231697083, "learning_rate": 7.734740790612137e-06, "loss": 0.3641, "step": 601 }, { "epoch": 1.1554702495201536, "grad_norm": 0.665376603603363, "learning_rate": 7.72538106957221e-06, "loss": 0.3734, "step": 602 }, { "epoch": 1.1573896353166986, "grad_norm": 0.6434155106544495, "learning_rate": 7.716007741727368e-06, "loss": 0.3629, "step": 603 }, { "epoch": 1.1593090211132437, "grad_norm": 0.5811706781387329, "learning_rate": 7.706620853875115e-06, "loss": 0.3566, "step": 604 }, { "epoch": 1.161228406909789, "grad_norm": 0.6345758438110352, "learning_rate": 7.69722045288066e-06, "loss": 0.3477, "step": 605 }, { "epoch": 1.163147792706334, "grad_norm": 0.6229391694068909, "learning_rate": 7.687806585676678e-06, "loss": 0.3762, "step": 606 }, { "epoch": 1.165067178502879, "grad_norm": 0.7289245128631592, "learning_rate": 7.678379299263076e-06, "loss": 0.3439, "step": 607 }, { "epoch": 1.1669865642994242, "grad_norm": 0.7816213965415955, "learning_rate": 7.668938640706756e-06, "loss": 0.3755, "step": 608 }, { "epoch": 1.1689059500959693, "grad_norm": 0.6519376039505005, "learning_rate": 7.659484657141382e-06, "loss": 0.3605, "step": 609 }, { "epoch": 1.1708253358925145, "grad_norm": 0.6300104856491089, "learning_rate": 7.650017395767149e-06, "loss": 0.3508, "step": 610 }, { "epoch": 1.1727447216890594, "grad_norm": 0.725034236907959, "learning_rate": 7.64053690385054e-06, "loss": 0.3819, "step": 611 }, { "epoch": 1.1746641074856046, "grad_norm": 0.5700224041938782, "learning_rate": 7.631043228724091e-06, "loss": 0.3507, "step": 612 }, { "epoch": 1.1765834932821497, "grad_norm": 0.5461344122886658, "learning_rate": 7.621536417786159e-06, "loss": 0.3659, "step": 613 }, { "epoch": 1.1785028790786949, "grad_norm": 0.6267542839050293, "learning_rate": 7.612016518500686e-06, "loss": 0.3575, "step": 614 }, { "epoch": 1.18042226487524, "grad_norm": 0.5585712194442749, "learning_rate": 7.602483578396955e-06, "loss": 0.374, "step": 615 }, { "epoch": 1.182341650671785, "grad_norm": 0.5674108862876892, "learning_rate": 7.59293764506936e-06, "loss": 0.3829, "step": 616 }, { "epoch": 1.1842610364683301, "grad_norm": 0.5605682730674744, "learning_rate": 7.583378766177163e-06, "loss": 0.3698, "step": 617 }, { "epoch": 1.1861804222648753, "grad_norm": 0.5978506207466125, "learning_rate": 7.573806989444257e-06, "loss": 0.3742, "step": 618 }, { "epoch": 1.1880998080614202, "grad_norm": 0.6251167058944702, "learning_rate": 7.564222362658935e-06, "loss": 0.371, "step": 619 }, { "epoch": 1.1900191938579654, "grad_norm": 0.5181376338005066, "learning_rate": 7.554624933673638e-06, "loss": 0.3735, "step": 620 }, { "epoch": 1.1919385796545106, "grad_norm": 0.5934174060821533, "learning_rate": 7.54501475040473e-06, "loss": 0.3575, "step": 621 }, { "epoch": 1.1938579654510557, "grad_norm": 0.6636096239089966, "learning_rate": 7.5353918608322476e-06, "loss": 0.3817, "step": 622 }, { "epoch": 1.1957773512476009, "grad_norm": 0.5940460562705994, "learning_rate": 7.52575631299967e-06, "loss": 0.3834, "step": 623 }, { "epoch": 1.1976967370441458, "grad_norm": 0.5859001278877258, "learning_rate": 7.516108155013667e-06, "loss": 0.3837, "step": 624 }, { "epoch": 1.199616122840691, "grad_norm": 0.6759419441223145, "learning_rate": 7.5064474350438755e-06, "loss": 0.37, "step": 625 }, { "epoch": 1.2015355086372361, "grad_norm": 0.5810613036155701, "learning_rate": 7.4967742013226415e-06, "loss": 0.3741, "step": 626 }, { "epoch": 1.2034548944337813, "grad_norm": 0.5574901103973389, "learning_rate": 7.487088502144793e-06, "loss": 0.3601, "step": 627 }, { "epoch": 1.2053742802303262, "grad_norm": 0.5251055955886841, "learning_rate": 7.477390385867391e-06, "loss": 0.36, "step": 628 }, { "epoch": 1.2072936660268714, "grad_norm": 0.6228454113006592, "learning_rate": 7.467679900909489e-06, "loss": 0.3749, "step": 629 }, { "epoch": 1.2092130518234165, "grad_norm": 0.5849305987358093, "learning_rate": 7.457957095751896e-06, "loss": 0.356, "step": 630 }, { "epoch": 1.2111324376199617, "grad_norm": 0.57450270652771, "learning_rate": 7.4482220189369295e-06, "loss": 0.3731, "step": 631 }, { "epoch": 1.2130518234165066, "grad_norm": 0.5687177181243896, "learning_rate": 7.438474719068174e-06, "loss": 0.361, "step": 632 }, { "epoch": 1.2149712092130518, "grad_norm": 0.6256352066993713, "learning_rate": 7.428715244810238e-06, "loss": 0.3672, "step": 633 }, { "epoch": 1.216890595009597, "grad_norm": 0.5820345878601074, "learning_rate": 7.418943644888518e-06, "loss": 0.3696, "step": 634 }, { "epoch": 1.2188099808061421, "grad_norm": 0.5326980948448181, "learning_rate": 7.4091599680889425e-06, "loss": 0.3858, "step": 635 }, { "epoch": 1.220729366602687, "grad_norm": 0.529526948928833, "learning_rate": 7.399364263257739e-06, "loss": 0.3516, "step": 636 }, { "epoch": 1.2226487523992322, "grad_norm": 0.6137664914131165, "learning_rate": 7.389556579301186e-06, "loss": 0.3632, "step": 637 }, { "epoch": 1.2245681381957774, "grad_norm": 0.5726659297943115, "learning_rate": 7.379736965185369e-06, "loss": 0.3731, "step": 638 }, { "epoch": 1.2264875239923225, "grad_norm": 0.513790488243103, "learning_rate": 7.369905469935935e-06, "loss": 0.3598, "step": 639 }, { "epoch": 1.2284069097888675, "grad_norm": 0.5283376574516296, "learning_rate": 7.3600621426378515e-06, "loss": 0.3626, "step": 640 }, { "epoch": 1.2303262955854126, "grad_norm": 0.5405957698822021, "learning_rate": 7.350207032435157e-06, "loss": 0.3723, "step": 641 }, { "epoch": 1.2322456813819578, "grad_norm": 0.609307587146759, "learning_rate": 7.340340188530719e-06, "loss": 0.3601, "step": 642 }, { "epoch": 1.234165067178503, "grad_norm": 0.5811563730239868, "learning_rate": 7.330461660185987e-06, "loss": 0.3593, "step": 643 }, { "epoch": 1.236084452975048, "grad_norm": 0.5257121324539185, "learning_rate": 7.320571496720743e-06, "loss": 0.3683, "step": 644 }, { "epoch": 1.238003838771593, "grad_norm": 0.5702775716781616, "learning_rate": 7.3106697475128655e-06, "loss": 0.386, "step": 645 }, { "epoch": 1.2399232245681382, "grad_norm": 0.683293879032135, "learning_rate": 7.300756461998071e-06, "loss": 0.3502, "step": 646 }, { "epoch": 1.2418426103646834, "grad_norm": 0.5562958717346191, "learning_rate": 7.2908316896696725e-06, "loss": 0.37, "step": 647 }, { "epoch": 1.2437619961612283, "grad_norm": 0.6258202195167542, "learning_rate": 7.280895480078335e-06, "loss": 0.3838, "step": 648 }, { "epoch": 1.2456813819577734, "grad_norm": 0.5628198385238647, "learning_rate": 7.270947882831823e-06, "loss": 0.3674, "step": 649 }, { "epoch": 1.2476007677543186, "grad_norm": 0.718628466129303, "learning_rate": 7.260988947594759e-06, "loss": 0.3793, "step": 650 }, { "epoch": 1.2495201535508638, "grad_norm": 0.5685130953788757, "learning_rate": 7.251018724088367e-06, "loss": 0.3788, "step": 651 }, { "epoch": 1.251439539347409, "grad_norm": 0.5694072842597961, "learning_rate": 7.241037262090232e-06, "loss": 0.3607, "step": 652 }, { "epoch": 1.2533589251439539, "grad_norm": 0.6224561929702759, "learning_rate": 7.231044611434049e-06, "loss": 0.3795, "step": 653 }, { "epoch": 1.255278310940499, "grad_norm": 0.6552006602287292, "learning_rate": 7.221040822009372e-06, "loss": 0.3547, "step": 654 }, { "epoch": 1.2571976967370442, "grad_norm": 0.5914192199707031, "learning_rate": 7.211025943761367e-06, "loss": 0.3681, "step": 655 }, { "epoch": 1.2591170825335891, "grad_norm": 0.619067370891571, "learning_rate": 7.201000026690562e-06, "loss": 0.387, "step": 656 }, { "epoch": 1.2610364683301343, "grad_norm": 0.5412355661392212, "learning_rate": 7.190963120852601e-06, "loss": 0.3552, "step": 657 }, { "epoch": 1.2629558541266794, "grad_norm": 0.5563709735870361, "learning_rate": 7.180915276357987e-06, "loss": 0.3622, "step": 658 }, { "epoch": 1.2648752399232246, "grad_norm": 0.6318453550338745, "learning_rate": 7.1708565433718354e-06, "loss": 0.3803, "step": 659 }, { "epoch": 1.2667946257197698, "grad_norm": 0.47105222940444946, "learning_rate": 7.160786972113627e-06, "loss": 0.3909, "step": 660 }, { "epoch": 1.2687140115163147, "grad_norm": 0.5417951941490173, "learning_rate": 7.150706612856952e-06, "loss": 0.3627, "step": 661 }, { "epoch": 1.2706333973128598, "grad_norm": 0.4920963943004608, "learning_rate": 7.140615515929262e-06, "loss": 0.3673, "step": 662 }, { "epoch": 1.272552783109405, "grad_norm": 0.5082047581672668, "learning_rate": 7.130513731711616e-06, "loss": 0.366, "step": 663 }, { "epoch": 1.2744721689059502, "grad_norm": 0.5472375750541687, "learning_rate": 7.120401310638432e-06, "loss": 0.3734, "step": 664 }, { "epoch": 1.2763915547024953, "grad_norm": 0.6218360662460327, "learning_rate": 7.1102783031972326e-06, "loss": 0.3607, "step": 665 }, { "epoch": 1.2783109404990403, "grad_norm": 0.5033209323883057, "learning_rate": 7.100144759928396e-06, "loss": 0.3562, "step": 666 }, { "epoch": 1.2802303262955854, "grad_norm": 0.5684214234352112, "learning_rate": 7.0900007314249e-06, "loss": 0.3732, "step": 667 }, { "epoch": 1.2821497120921306, "grad_norm": 0.5835021138191223, "learning_rate": 7.079846268332073e-06, "loss": 0.3797, "step": 668 }, { "epoch": 1.2840690978886755, "grad_norm": 0.5322245359420776, "learning_rate": 7.06968142134734e-06, "loss": 0.3731, "step": 669 }, { "epoch": 1.2859884836852207, "grad_norm": 0.5383755564689636, "learning_rate": 7.059506241219964e-06, "loss": 0.3609, "step": 670 }, { "epoch": 1.2879078694817658, "grad_norm": 0.5906662940979004, "learning_rate": 7.0493207787508034e-06, "loss": 0.3671, "step": 671 }, { "epoch": 1.289827255278311, "grad_norm": 0.5733925700187683, "learning_rate": 7.039125084792049e-06, "loss": 0.3604, "step": 672 }, { "epoch": 1.2917466410748562, "grad_norm": 0.5559172630310059, "learning_rate": 7.028919210246975e-06, "loss": 0.3621, "step": 673 }, { "epoch": 1.293666026871401, "grad_norm": 0.5421929359436035, "learning_rate": 7.018703206069684e-06, "loss": 0.3284, "step": 674 }, { "epoch": 1.2955854126679462, "grad_norm": 0.5646209120750427, "learning_rate": 7.008477123264849e-06, "loss": 0.3716, "step": 675 }, { "epoch": 1.2975047984644914, "grad_norm": 0.6280581951141357, "learning_rate": 6.998241012887463e-06, "loss": 0.3623, "step": 676 }, { "epoch": 1.2994241842610363, "grad_norm": 0.5377994775772095, "learning_rate": 6.987994926042588e-06, "loss": 0.3568, "step": 677 }, { "epoch": 1.3013435700575815, "grad_norm": 0.5917571187019348, "learning_rate": 6.977738913885087e-06, "loss": 0.3671, "step": 678 }, { "epoch": 1.3032629558541267, "grad_norm": 0.600432813167572, "learning_rate": 6.967473027619381e-06, "loss": 0.3747, "step": 679 }, { "epoch": 1.3051823416506718, "grad_norm": 0.5064613819122314, "learning_rate": 6.957197318499187e-06, "loss": 0.355, "step": 680 }, { "epoch": 1.307101727447217, "grad_norm": 0.5571784973144531, "learning_rate": 6.946911837827267e-06, "loss": 0.3624, "step": 681 }, { "epoch": 1.309021113243762, "grad_norm": 0.6019419431686401, "learning_rate": 6.936616636955164e-06, "loss": 0.3971, "step": 682 }, { "epoch": 1.310940499040307, "grad_norm": 0.5896368622779846, "learning_rate": 6.926311767282951e-06, "loss": 0.3779, "step": 683 }, { "epoch": 1.3128598848368522, "grad_norm": 0.5325536131858826, "learning_rate": 6.915997280258977e-06, "loss": 0.3862, "step": 684 }, { "epoch": 1.3147792706333974, "grad_norm": 0.5857822895050049, "learning_rate": 6.905673227379606e-06, "loss": 0.3894, "step": 685 }, { "epoch": 1.3166986564299425, "grad_norm": 0.5881710648536682, "learning_rate": 6.895339660188958e-06, "loss": 0.3827, "step": 686 }, { "epoch": 1.3186180422264875, "grad_norm": 0.5315939784049988, "learning_rate": 6.884996630278654e-06, "loss": 0.3706, "step": 687 }, { "epoch": 1.3205374280230326, "grad_norm": 0.5695570111274719, "learning_rate": 6.874644189287566e-06, "loss": 0.3773, "step": 688 }, { "epoch": 1.3224568138195778, "grad_norm": 0.5689035058021545, "learning_rate": 6.864282388901544e-06, "loss": 0.3458, "step": 689 }, { "epoch": 1.3243761996161227, "grad_norm": 0.6650693416595459, "learning_rate": 6.853911280853168e-06, "loss": 0.3886, "step": 690 }, { "epoch": 1.326295585412668, "grad_norm": 0.5835446715354919, "learning_rate": 6.84353091692149e-06, "loss": 0.3815, "step": 691 }, { "epoch": 1.328214971209213, "grad_norm": 0.5177924036979675, "learning_rate": 6.833141348931773e-06, "loss": 0.3731, "step": 692 }, { "epoch": 1.3301343570057582, "grad_norm": 0.5422524213790894, "learning_rate": 6.822742628755228e-06, "loss": 0.371, "step": 693 }, { "epoch": 1.3320537428023034, "grad_norm": 0.6401960849761963, "learning_rate": 6.812334808308762e-06, "loss": 0.4015, "step": 694 }, { "epoch": 1.3339731285988483, "grad_norm": 0.5808922648429871, "learning_rate": 6.801917939554721e-06, "loss": 0.3754, "step": 695 }, { "epoch": 1.3358925143953935, "grad_norm": 0.5419764518737793, "learning_rate": 6.791492074500618e-06, "loss": 0.3535, "step": 696 }, { "epoch": 1.3378119001919386, "grad_norm": 0.6075758337974548, "learning_rate": 6.781057265198885e-06, "loss": 0.3788, "step": 697 }, { "epoch": 1.3397312859884836, "grad_norm": 0.4835999011993408, "learning_rate": 6.770613563746609e-06, "loss": 0.3805, "step": 698 }, { "epoch": 1.3416506717850287, "grad_norm": 0.5276488661766052, "learning_rate": 6.760161022285274e-06, "loss": 0.3863, "step": 699 }, { "epoch": 1.3435700575815739, "grad_norm": 0.5761359930038452, "learning_rate": 6.749699693000495e-06, "loss": 0.3772, "step": 700 }, { "epoch": 1.345489443378119, "grad_norm": 0.505481481552124, "learning_rate": 6.739229628121765e-06, "loss": 0.3758, "step": 701 }, { "epoch": 1.3474088291746642, "grad_norm": 0.5998331308364868, "learning_rate": 6.728750879922187e-06, "loss": 0.3521, "step": 702 }, { "epoch": 1.3493282149712091, "grad_norm": 0.5504065752029419, "learning_rate": 6.7182635007182186e-06, "loss": 0.3724, "step": 703 }, { "epoch": 1.3512476007677543, "grad_norm": 0.5401632785797119, "learning_rate": 6.70776754286941e-06, "loss": 0.3859, "step": 704 }, { "epoch": 1.3531669865642995, "grad_norm": 0.5649744272232056, "learning_rate": 6.6972630587781385e-06, "loss": 0.3882, "step": 705 }, { "epoch": 1.3550863723608444, "grad_norm": 0.551974356174469, "learning_rate": 6.686750100889351e-06, "loss": 0.3698, "step": 706 }, { "epoch": 1.3570057581573896, "grad_norm": 0.5754601955413818, "learning_rate": 6.676228721690301e-06, "loss": 0.3819, "step": 707 }, { "epoch": 1.3589251439539347, "grad_norm": 0.5498519539833069, "learning_rate": 6.665698973710289e-06, "loss": 0.3565, "step": 708 }, { "epoch": 1.3608445297504799, "grad_norm": 0.5401290059089661, "learning_rate": 6.655160909520391e-06, "loss": 0.362, "step": 709 }, { "epoch": 1.362763915547025, "grad_norm": 0.5850600004196167, "learning_rate": 6.6446145817332105e-06, "loss": 0.3632, "step": 710 }, { "epoch": 1.36468330134357, "grad_norm": 0.5589048862457275, "learning_rate": 6.634060043002603e-06, "loss": 0.355, "step": 711 }, { "epoch": 1.3666026871401151, "grad_norm": 0.546894371509552, "learning_rate": 6.6234973460234184e-06, "loss": 0.3618, "step": 712 }, { "epoch": 1.3685220729366603, "grad_norm": 0.6020057201385498, "learning_rate": 6.6129265435312405e-06, "loss": 0.368, "step": 713 }, { "epoch": 1.3704414587332054, "grad_norm": 0.6082607507705688, "learning_rate": 6.602347688302118e-06, "loss": 0.3603, "step": 714 }, { "epoch": 1.3723608445297506, "grad_norm": 0.5556630492210388, "learning_rate": 6.591760833152306e-06, "loss": 0.3764, "step": 715 }, { "epoch": 1.3742802303262955, "grad_norm": 0.5553204417228699, "learning_rate": 6.581166030937998e-06, "loss": 0.3424, "step": 716 }, { "epoch": 1.3761996161228407, "grad_norm": 0.516120970249176, "learning_rate": 6.570563334555068e-06, "loss": 0.3887, "step": 717 }, { "epoch": 1.3781190019193859, "grad_norm": 0.5340508222579956, "learning_rate": 6.5599527969387964e-06, "loss": 0.371, "step": 718 }, { "epoch": 1.3800383877159308, "grad_norm": 0.6220966577529907, "learning_rate": 6.54933447106362e-06, "loss": 0.3779, "step": 719 }, { "epoch": 1.381957773512476, "grad_norm": 0.5505868792533875, "learning_rate": 6.538708409942854e-06, "loss": 0.3738, "step": 720 }, { "epoch": 1.383877159309021, "grad_norm": 0.5337285399436951, "learning_rate": 6.52807466662843e-06, "loss": 0.364, "step": 721 }, { "epoch": 1.3857965451055663, "grad_norm": 0.6182253956794739, "learning_rate": 6.517433294210642e-06, "loss": 0.3481, "step": 722 }, { "epoch": 1.3877159309021114, "grad_norm": 0.5496888756752014, "learning_rate": 6.506784345817867e-06, "loss": 0.365, "step": 723 }, { "epoch": 1.3896353166986564, "grad_norm": 0.6307915449142456, "learning_rate": 6.49612787461631e-06, "loss": 0.3627, "step": 724 }, { "epoch": 1.3915547024952015, "grad_norm": 0.615845799446106, "learning_rate": 6.48546393380973e-06, "loss": 0.4019, "step": 725 }, { "epoch": 1.3934740882917467, "grad_norm": 0.561164379119873, "learning_rate": 6.474792576639184e-06, "loss": 0.3729, "step": 726 }, { "epoch": 1.3953934740882916, "grad_norm": 0.5522388219833374, "learning_rate": 6.464113856382752e-06, "loss": 0.3665, "step": 727 }, { "epoch": 1.3973128598848368, "grad_norm": 0.6079753041267395, "learning_rate": 6.4534278263552785e-06, "loss": 0.3823, "step": 728 }, { "epoch": 1.399232245681382, "grad_norm": 0.5673210024833679, "learning_rate": 6.4427345399081e-06, "loss": 0.3431, "step": 729 }, { "epoch": 1.401151631477927, "grad_norm": 0.5114516615867615, "learning_rate": 6.4320340504287825e-06, "loss": 0.3435, "step": 730 }, { "epoch": 1.4030710172744723, "grad_norm": 0.6319993734359741, "learning_rate": 6.421326411340855e-06, "loss": 0.3968, "step": 731 }, { "epoch": 1.4049904030710172, "grad_norm": 0.5365750789642334, "learning_rate": 6.410611676103542e-06, "loss": 0.3559, "step": 732 }, { "epoch": 1.4069097888675623, "grad_norm": 0.5675460696220398, "learning_rate": 6.399889898211495e-06, "loss": 0.3676, "step": 733 }, { "epoch": 1.4088291746641075, "grad_norm": 0.5163068175315857, "learning_rate": 6.389161131194525e-06, "loss": 0.3553, "step": 734 }, { "epoch": 1.4107485604606527, "grad_norm": 0.4818779230117798, "learning_rate": 6.378425428617343e-06, "loss": 0.3853, "step": 735 }, { "epoch": 1.4126679462571978, "grad_norm": 0.5703138113021851, "learning_rate": 6.3676828440792815e-06, "loss": 0.3579, "step": 736 }, { "epoch": 1.4145873320537428, "grad_norm": 0.5984382629394531, "learning_rate": 6.356933431214034e-06, "loss": 0.3614, "step": 737 }, { "epoch": 1.416506717850288, "grad_norm": 0.5235782265663147, "learning_rate": 6.346177243689384e-06, "loss": 0.3372, "step": 738 }, { "epoch": 1.418426103646833, "grad_norm": 0.5993474721908569, "learning_rate": 6.3354143352069415e-06, "loss": 0.3538, "step": 739 }, { "epoch": 1.420345489443378, "grad_norm": 0.5441020131111145, "learning_rate": 6.324644759501869e-06, "loss": 0.3579, "step": 740 }, { "epoch": 1.4222648752399232, "grad_norm": 0.5287744998931885, "learning_rate": 6.313868570342614e-06, "loss": 0.3483, "step": 741 }, { "epoch": 1.4241842610364683, "grad_norm": 0.5689530968666077, "learning_rate": 6.303085821530647e-06, "loss": 0.3735, "step": 742 }, { "epoch": 1.4261036468330135, "grad_norm": 0.5570158362388611, "learning_rate": 6.292296566900187e-06, "loss": 0.3679, "step": 743 }, { "epoch": 1.4280230326295587, "grad_norm": 0.6042669415473938, "learning_rate": 6.281500860317931e-06, "loss": 0.3692, "step": 744 }, { "epoch": 1.4299424184261036, "grad_norm": 0.5010374784469604, "learning_rate": 6.270698755682792e-06, "loss": 0.3762, "step": 745 }, { "epoch": 1.4318618042226487, "grad_norm": 0.525863528251648, "learning_rate": 6.259890306925627e-06, "loss": 0.3658, "step": 746 }, { "epoch": 1.433781190019194, "grad_norm": 0.5522574186325073, "learning_rate": 6.249075568008961e-06, "loss": 0.3484, "step": 747 }, { "epoch": 1.4357005758157388, "grad_norm": 0.5156435370445251, "learning_rate": 6.238254592926728e-06, "loss": 0.3549, "step": 748 }, { "epoch": 1.437619961612284, "grad_norm": 0.5174874663352966, "learning_rate": 6.227427435703997e-06, "loss": 0.352, "step": 749 }, { "epoch": 1.4395393474088292, "grad_norm": 0.539335310459137, "learning_rate": 6.2165941503966995e-06, "loss": 0.3638, "step": 750 }, { "epoch": 1.4414587332053743, "grad_norm": 0.49308961629867554, "learning_rate": 6.205754791091364e-06, "loss": 0.3458, "step": 751 }, { "epoch": 1.4433781190019195, "grad_norm": 0.5567535758018494, "learning_rate": 6.194909411904842e-06, "loss": 0.3526, "step": 752 }, { "epoch": 1.4452975047984644, "grad_norm": 0.49278905987739563, "learning_rate": 6.1840580669840455e-06, "loss": 0.3579, "step": 753 }, { "epoch": 1.4472168905950096, "grad_norm": 0.51790851354599, "learning_rate": 6.173200810505667e-06, "loss": 0.3602, "step": 754 }, { "epoch": 1.4491362763915547, "grad_norm": 0.5248944163322449, "learning_rate": 6.162337696675909e-06, "loss": 0.3796, "step": 755 }, { "epoch": 1.4510556621880997, "grad_norm": 0.4750197231769562, "learning_rate": 6.151468779730226e-06, "loss": 0.366, "step": 756 }, { "epoch": 1.452975047984645, "grad_norm": 0.5236292481422424, "learning_rate": 6.140594113933043e-06, "loss": 0.3721, "step": 757 }, { "epoch": 1.45489443378119, "grad_norm": 0.4761126637458801, "learning_rate": 6.129713753577482e-06, "loss": 0.3557, "step": 758 }, { "epoch": 1.4568138195777351, "grad_norm": 0.518679678440094, "learning_rate": 6.1188277529851015e-06, "loss": 0.3702, "step": 759 }, { "epoch": 1.4587332053742803, "grad_norm": 0.4993003010749817, "learning_rate": 6.107936166505615e-06, "loss": 0.3533, "step": 760 }, { "epoch": 1.4606525911708252, "grad_norm": 0.5242207050323486, "learning_rate": 6.097039048516628e-06, "loss": 0.3867, "step": 761 }, { "epoch": 1.4625719769673704, "grad_norm": 0.4760700762271881, "learning_rate": 6.0861364534233615e-06, "loss": 0.3716, "step": 762 }, { "epoch": 1.4644913627639156, "grad_norm": 0.4641895890235901, "learning_rate": 6.075228435658379e-06, "loss": 0.3536, "step": 763 }, { "epoch": 1.4664107485604607, "grad_norm": 0.48264387249946594, "learning_rate": 6.064315049681323e-06, "loss": 0.3652, "step": 764 }, { "epoch": 1.4683301343570059, "grad_norm": 0.5038925409317017, "learning_rate": 6.053396349978632e-06, "loss": 0.3756, "step": 765 }, { "epoch": 1.4702495201535508, "grad_norm": 0.5263834595680237, "learning_rate": 6.042472391063277e-06, "loss": 0.3434, "step": 766 }, { "epoch": 1.472168905950096, "grad_norm": 0.6097690463066101, "learning_rate": 6.031543227474486e-06, "loss": 0.3528, "step": 767 }, { "epoch": 1.4740882917466411, "grad_norm": 0.5428218841552734, "learning_rate": 6.0206089137774696e-06, "loss": 0.3639, "step": 768 }, { "epoch": 1.476007677543186, "grad_norm": 0.4892042279243469, "learning_rate": 6.009669504563154e-06, "loss": 0.4019, "step": 769 }, { "epoch": 1.4779270633397312, "grad_norm": 0.5798658132553101, "learning_rate": 5.998725054447904e-06, "loss": 0.3636, "step": 770 }, { "epoch": 1.4798464491362764, "grad_norm": 0.5247802138328552, "learning_rate": 5.9877756180732505e-06, "loss": 0.362, "step": 771 }, { "epoch": 1.4817658349328215, "grad_norm": 0.5077059864997864, "learning_rate": 5.976821250105622e-06, "loss": 0.3635, "step": 772 }, { "epoch": 1.4836852207293667, "grad_norm": 0.4664514362812042, "learning_rate": 5.965862005236067e-06, "loss": 0.3879, "step": 773 }, { "epoch": 1.4856046065259116, "grad_norm": 0.567458987236023, "learning_rate": 5.954897938179982e-06, "loss": 0.3361, "step": 774 }, { "epoch": 1.4875239923224568, "grad_norm": 0.517892599105835, "learning_rate": 5.943929103676839e-06, "loss": 0.367, "step": 775 }, { "epoch": 1.489443378119002, "grad_norm": 0.50938481092453, "learning_rate": 5.932955556489912e-06, "loss": 0.3865, "step": 776 }, { "epoch": 1.491362763915547, "grad_norm": 0.4991002082824707, "learning_rate": 5.921977351406004e-06, "loss": 0.3541, "step": 777 }, { "epoch": 1.493282149712092, "grad_norm": 0.4735221564769745, "learning_rate": 5.9109945432351745e-06, "loss": 0.3424, "step": 778 }, { "epoch": 1.4952015355086372, "grad_norm": 0.5408190488815308, "learning_rate": 5.900007186810461e-06, "loss": 0.3894, "step": 779 }, { "epoch": 1.4971209213051824, "grad_norm": 0.517458975315094, "learning_rate": 5.889015336987614e-06, "loss": 0.3832, "step": 780 }, { "epoch": 1.4990403071017275, "grad_norm": 0.5397912263870239, "learning_rate": 5.878019048644812e-06, "loss": 0.3768, "step": 781 }, { "epoch": 1.5009596928982725, "grad_norm": 0.5046637654304504, "learning_rate": 5.8670183766823965e-06, "loss": 0.3871, "step": 782 }, { "epoch": 1.5028790786948176, "grad_norm": 0.5143306851387024, "learning_rate": 5.856013376022594e-06, "loss": 0.3729, "step": 783 }, { "epoch": 1.5047984644913628, "grad_norm": 0.5319773554801941, "learning_rate": 5.8450041016092465e-06, "loss": 0.3457, "step": 784 }, { "epoch": 1.5067178502879077, "grad_norm": 0.5275461077690125, "learning_rate": 5.833990608407525e-06, "loss": 0.3479, "step": 785 }, { "epoch": 1.508637236084453, "grad_norm": 0.491390198469162, "learning_rate": 5.82297295140367e-06, "loss": 0.3507, "step": 786 }, { "epoch": 1.510556621880998, "grad_norm": 0.5845358371734619, "learning_rate": 5.811951185604709e-06, "loss": 0.3494, "step": 787 }, { "epoch": 1.5124760076775432, "grad_norm": 0.5382588505744934, "learning_rate": 5.8009253660381806e-06, "loss": 0.3751, "step": 788 }, { "epoch": 1.5143953934740884, "grad_norm": 0.5044906735420227, "learning_rate": 5.789895547751867e-06, "loss": 0.3496, "step": 789 }, { "epoch": 1.5163147792706333, "grad_norm": 0.5060889720916748, "learning_rate": 5.778861785813508e-06, "loss": 0.3704, "step": 790 }, { "epoch": 1.5182341650671785, "grad_norm": 0.5663946270942688, "learning_rate": 5.767824135310538e-06, "loss": 0.3432, "step": 791 }, { "epoch": 1.5201535508637236, "grad_norm": 0.5065401792526245, "learning_rate": 5.756782651349804e-06, "loss": 0.3574, "step": 792 }, { "epoch": 1.5220729366602685, "grad_norm": 0.48517510294914246, "learning_rate": 5.745737389057294e-06, "loss": 0.3746, "step": 793 }, { "epoch": 1.523992322456814, "grad_norm": 0.5510480999946594, "learning_rate": 5.734688403577854e-06, "loss": 0.3783, "step": 794 }, { "epoch": 1.5259117082533589, "grad_norm": 0.6211479902267456, "learning_rate": 5.723635750074924e-06, "loss": 0.3833, "step": 795 }, { "epoch": 1.527831094049904, "grad_norm": 0.4983424246311188, "learning_rate": 5.7125794837302554e-06, "loss": 0.3498, "step": 796 }, { "epoch": 1.5297504798464492, "grad_norm": 0.5450094938278198, "learning_rate": 5.701519659743636e-06, "loss": 0.3539, "step": 797 }, { "epoch": 1.5316698656429941, "grad_norm": 0.619610071182251, "learning_rate": 5.690456333332617e-06, "loss": 0.3601, "step": 798 }, { "epoch": 1.5335892514395395, "grad_norm": 0.585547685623169, "learning_rate": 5.679389559732234e-06, "loss": 0.376, "step": 799 }, { "epoch": 1.5355086372360844, "grad_norm": 0.4904400408267975, "learning_rate": 5.6683193941947365e-06, "loss": 0.3615, "step": 800 }, { "epoch": 1.5374280230326296, "grad_norm": 0.5027180910110474, "learning_rate": 5.657245891989307e-06, "loss": 0.3587, "step": 801 }, { "epoch": 1.5393474088291748, "grad_norm": 0.5675384402275085, "learning_rate": 5.646169108401785e-06, "loss": 0.338, "step": 802 }, { "epoch": 1.5412667946257197, "grad_norm": 0.49412500858306885, "learning_rate": 5.635089098734394e-06, "loss": 0.3812, "step": 803 }, { "epoch": 1.5431861804222649, "grad_norm": 0.5398568511009216, "learning_rate": 5.624005918305466e-06, "loss": 0.3481, "step": 804 }, { "epoch": 1.54510556621881, "grad_norm": 0.5652787089347839, "learning_rate": 5.61291962244916e-06, "loss": 0.3719, "step": 805 }, { "epoch": 1.547024952015355, "grad_norm": 0.5018628239631653, "learning_rate": 5.601830266515191e-06, "loss": 0.3581, "step": 806 }, { "epoch": 1.5489443378119003, "grad_norm": 0.5567100644111633, "learning_rate": 5.59073790586855e-06, "loss": 0.3722, "step": 807 }, { "epoch": 1.5508637236084453, "grad_norm": 0.5201087594032288, "learning_rate": 5.579642595889237e-06, "loss": 0.3677, "step": 808 }, { "epoch": 1.5527831094049904, "grad_norm": 0.5062837600708008, "learning_rate": 5.568544391971964e-06, "loss": 0.3602, "step": 809 }, { "epoch": 1.5547024952015356, "grad_norm": 0.5320248603820801, "learning_rate": 5.5574433495259015e-06, "loss": 0.3721, "step": 810 }, { "epoch": 1.5566218809980805, "grad_norm": 0.5407199263572693, "learning_rate": 5.546339523974389e-06, "loss": 0.3635, "step": 811 }, { "epoch": 1.5585412667946257, "grad_norm": 0.4978143870830536, "learning_rate": 5.5352329707546605e-06, "loss": 0.3701, "step": 812 }, { "epoch": 1.5604606525911708, "grad_norm": 0.4879395067691803, "learning_rate": 5.5241237453175664e-06, "loss": 0.3623, "step": 813 }, { "epoch": 1.5623800383877158, "grad_norm": 0.4949481785297394, "learning_rate": 5.513011903127301e-06, "loss": 0.3518, "step": 814 }, { "epoch": 1.5642994241842612, "grad_norm": 0.46726474165916443, "learning_rate": 5.501897499661123e-06, "loss": 0.3547, "step": 815 }, { "epoch": 1.566218809980806, "grad_norm": 0.5235663056373596, "learning_rate": 5.49078059040908e-06, "loss": 0.3685, "step": 816 }, { "epoch": 1.5681381957773513, "grad_norm": 0.490326851606369, "learning_rate": 5.4796612308737225e-06, "loss": 0.374, "step": 817 }, { "epoch": 1.5700575815738964, "grad_norm": 0.5144467353820801, "learning_rate": 5.4685394765698455e-06, "loss": 0.3918, "step": 818 }, { "epoch": 1.5719769673704413, "grad_norm": 0.5261038541793823, "learning_rate": 5.4574153830241905e-06, "loss": 0.3644, "step": 819 }, { "epoch": 1.5738963531669867, "grad_norm": 0.5425280332565308, "learning_rate": 5.446289005775185e-06, "loss": 0.3597, "step": 820 }, { "epoch": 1.5758157389635317, "grad_norm": 0.5394629836082458, "learning_rate": 5.435160400372653e-06, "loss": 0.3355, "step": 821 }, { "epoch": 1.5777351247600768, "grad_norm": 0.5311486721038818, "learning_rate": 5.4240296223775465e-06, "loss": 0.3813, "step": 822 }, { "epoch": 1.579654510556622, "grad_norm": 0.6040675044059753, "learning_rate": 5.412896727361663e-06, "loss": 0.3814, "step": 823 }, { "epoch": 1.581573896353167, "grad_norm": 0.502105176448822, "learning_rate": 5.401761770907368e-06, "loss": 0.383, "step": 824 }, { "epoch": 1.583493282149712, "grad_norm": 0.489644318819046, "learning_rate": 5.390624808607321e-06, "loss": 0.3662, "step": 825 }, { "epoch": 1.5854126679462572, "grad_norm": 0.5067335367202759, "learning_rate": 5.3794858960641945e-06, "loss": 0.3664, "step": 826 }, { "epoch": 1.5873320537428022, "grad_norm": 0.6188616156578064, "learning_rate": 5.368345088890401e-06, "loss": 0.3993, "step": 827 }, { "epoch": 1.5892514395393476, "grad_norm": 0.47886374592781067, "learning_rate": 5.35720244270781e-06, "loss": 0.374, "step": 828 }, { "epoch": 1.5911708253358925, "grad_norm": 0.535666286945343, "learning_rate": 5.34605801314747e-06, "loss": 0.3449, "step": 829 }, { "epoch": 1.5930902111324377, "grad_norm": 0.557597279548645, "learning_rate": 5.334911855849334e-06, "loss": 0.3608, "step": 830 }, { "epoch": 1.5950095969289828, "grad_norm": 0.4905683994293213, "learning_rate": 5.323764026461988e-06, "loss": 0.3611, "step": 831 }, { "epoch": 1.5969289827255277, "grad_norm": 0.4566667675971985, "learning_rate": 5.312614580642358e-06, "loss": 0.3353, "step": 832 }, { "epoch": 1.598848368522073, "grad_norm": 0.5605169534683228, "learning_rate": 5.301463574055441e-06, "loss": 0.3398, "step": 833 }, { "epoch": 1.600767754318618, "grad_norm": 0.490882933139801, "learning_rate": 5.290311062374031e-06, "loss": 0.3525, "step": 834 }, { "epoch": 1.602687140115163, "grad_norm": 0.49171173572540283, "learning_rate": 5.279157101278433e-06, "loss": 0.3571, "step": 835 }, { "epoch": 1.6046065259117084, "grad_norm": 0.5016440153121948, "learning_rate": 5.268001746456187e-06, "loss": 0.3617, "step": 836 }, { "epoch": 1.6065259117082533, "grad_norm": 0.5086113810539246, "learning_rate": 5.256845053601795e-06, "loss": 0.3542, "step": 837 }, { "epoch": 1.6084452975047985, "grad_norm": 0.5947433710098267, "learning_rate": 5.245687078416437e-06, "loss": 0.37, "step": 838 }, { "epoch": 1.6103646833013436, "grad_norm": 0.5029972195625305, "learning_rate": 5.234527876607698e-06, "loss": 0.3465, "step": 839 }, { "epoch": 1.6122840690978886, "grad_norm": 0.5203180313110352, "learning_rate": 5.2233675038892815e-06, "loss": 0.3786, "step": 840 }, { "epoch": 1.6142034548944337, "grad_norm": 0.5228397250175476, "learning_rate": 5.212206015980742e-06, "loss": 0.3507, "step": 841 }, { "epoch": 1.616122840690979, "grad_norm": 0.5539453029632568, "learning_rate": 5.201043468607199e-06, "loss": 0.3475, "step": 842 }, { "epoch": 1.6180422264875238, "grad_norm": 0.5815761685371399, "learning_rate": 5.189879917499067e-06, "loss": 0.3773, "step": 843 }, { "epoch": 1.6199616122840692, "grad_norm": 0.6207556128501892, "learning_rate": 5.178715418391761e-06, "loss": 0.3535, "step": 844 }, { "epoch": 1.6218809980806141, "grad_norm": 0.5413594245910645, "learning_rate": 5.1675500270254385e-06, "loss": 0.3632, "step": 845 }, { "epoch": 1.6238003838771593, "grad_norm": 0.5290043950080872, "learning_rate": 5.156383799144706e-06, "loss": 0.3756, "step": 846 }, { "epoch": 1.6257197696737045, "grad_norm": 0.5995103120803833, "learning_rate": 5.145216790498355e-06, "loss": 0.3522, "step": 847 }, { "epoch": 1.6276391554702494, "grad_norm": 0.654389500617981, "learning_rate": 5.134049056839062e-06, "loss": 0.3636, "step": 848 }, { "epoch": 1.6295585412667948, "grad_norm": 0.5937809944152832, "learning_rate": 5.122880653923134e-06, "loss": 0.3706, "step": 849 }, { "epoch": 1.6314779270633397, "grad_norm": 0.6210635304450989, "learning_rate": 5.111711637510216e-06, "loss": 0.3569, "step": 850 }, { "epoch": 1.6333973128598849, "grad_norm": 0.6012803912162781, "learning_rate": 5.100542063363013e-06, "loss": 0.37, "step": 851 }, { "epoch": 1.63531669865643, "grad_norm": 0.5639196634292603, "learning_rate": 5.0893719872470194e-06, "loss": 0.3307, "step": 852 }, { "epoch": 1.637236084452975, "grad_norm": 0.579879641532898, "learning_rate": 5.07820146493023e-06, "loss": 0.3712, "step": 853 }, { "epoch": 1.6391554702495201, "grad_norm": 0.627318799495697, "learning_rate": 5.067030552182874e-06, "loss": 0.3675, "step": 854 }, { "epoch": 1.6410748560460653, "grad_norm": 0.5440316796302795, "learning_rate": 5.055859304777127e-06, "loss": 0.3655, "step": 855 }, { "epoch": 1.6429942418426102, "grad_norm": 0.49604159593582153, "learning_rate": 5.044687778486834e-06, "loss": 0.3623, "step": 856 }, { "epoch": 1.6449136276391556, "grad_norm": 0.6087161302566528, "learning_rate": 5.033516029087231e-06, "loss": 0.3603, "step": 857 }, { "epoch": 1.6468330134357005, "grad_norm": 0.6035653352737427, "learning_rate": 5.022344112354673e-06, "loss": 0.3791, "step": 858 }, { "epoch": 1.6487523992322457, "grad_norm": 0.46521738171577454, "learning_rate": 5.011172084066349e-06, "loss": 0.3634, "step": 859 }, { "epoch": 1.6506717850287909, "grad_norm": 0.5060005784034729, "learning_rate": 5e-06, "loss": 0.3563, "step": 860 }, { "epoch": 1.6525911708253358, "grad_norm": 0.5457319617271423, "learning_rate": 4.988827915933652e-06, "loss": 0.3574, "step": 861 }, { "epoch": 1.654510556621881, "grad_norm": 0.5421509146690369, "learning_rate": 4.977655887645328e-06, "loss": 0.3595, "step": 862 }, { "epoch": 1.6564299424184261, "grad_norm": 0.500660240650177, "learning_rate": 4.966483970912769e-06, "loss": 0.3638, "step": 863 }, { "epoch": 1.658349328214971, "grad_norm": 0.5261163115501404, "learning_rate": 4.955312221513168e-06, "loss": 0.3639, "step": 864 }, { "epoch": 1.6602687140115164, "grad_norm": 0.5300873517990112, "learning_rate": 4.944140695222874e-06, "loss": 0.3555, "step": 865 }, { "epoch": 1.6621880998080614, "grad_norm": 0.5503329038619995, "learning_rate": 4.932969447817127e-06, "loss": 0.345, "step": 866 }, { "epoch": 1.6641074856046065, "grad_norm": 0.489361435174942, "learning_rate": 4.92179853506977e-06, "loss": 0.3732, "step": 867 }, { "epoch": 1.6660268714011517, "grad_norm": 0.5050930976867676, "learning_rate": 4.910628012752982e-06, "loss": 0.3379, "step": 868 }, { "epoch": 1.6679462571976966, "grad_norm": 0.5649816989898682, "learning_rate": 4.899457936636988e-06, "loss": 0.3464, "step": 869 }, { "epoch": 1.669865642994242, "grad_norm": 0.5544465780258179, "learning_rate": 4.888288362489786e-06, "loss": 0.3691, "step": 870 }, { "epoch": 1.671785028790787, "grad_norm": 0.5151294469833374, "learning_rate": 4.877119346076868e-06, "loss": 0.3798, "step": 871 }, { "epoch": 1.673704414587332, "grad_norm": 0.4968414306640625, "learning_rate": 4.865950943160938e-06, "loss": 0.3376, "step": 872 }, { "epoch": 1.6756238003838773, "grad_norm": 0.5651934742927551, "learning_rate": 4.854783209501646e-06, "loss": 0.3666, "step": 873 }, { "epoch": 1.6775431861804222, "grad_norm": 0.48310866951942444, "learning_rate": 4.843616200855295e-06, "loss": 0.3475, "step": 874 }, { "epoch": 1.6794625719769674, "grad_norm": 0.5078393816947937, "learning_rate": 4.832449972974564e-06, "loss": 0.36, "step": 875 }, { "epoch": 1.6813819577735125, "grad_norm": 0.48573625087738037, "learning_rate": 4.82128458160824e-06, "loss": 0.365, "step": 876 }, { "epoch": 1.6833013435700575, "grad_norm": 0.5191183090209961, "learning_rate": 4.810120082500934e-06, "loss": 0.3561, "step": 877 }, { "epoch": 1.6852207293666028, "grad_norm": 0.4595150351524353, "learning_rate": 4.7989565313928015e-06, "loss": 0.3666, "step": 878 }, { "epoch": 1.6871401151631478, "grad_norm": 0.4512834846973419, "learning_rate": 4.78779398401926e-06, "loss": 0.3751, "step": 879 }, { "epoch": 1.689059500959693, "grad_norm": 0.502501368522644, "learning_rate": 4.776632496110721e-06, "loss": 0.3521, "step": 880 }, { "epoch": 1.690978886756238, "grad_norm": 0.4949384927749634, "learning_rate": 4.765472123392304e-06, "loss": 0.3807, "step": 881 }, { "epoch": 1.692898272552783, "grad_norm": 0.47582072019577026, "learning_rate": 4.754312921583564e-06, "loss": 0.3592, "step": 882 }, { "epoch": 1.6948176583493282, "grad_norm": 0.5404005646705627, "learning_rate": 4.743154946398207e-06, "loss": 0.3559, "step": 883 }, { "epoch": 1.6967370441458733, "grad_norm": 0.5554011464118958, "learning_rate": 4.7319982535438156e-06, "loss": 0.3726, "step": 884 }, { "epoch": 1.6986564299424183, "grad_norm": 0.51959228515625, "learning_rate": 4.720842898721569e-06, "loss": 0.3715, "step": 885 }, { "epoch": 1.7005758157389637, "grad_norm": 0.5188332796096802, "learning_rate": 4.70968893762597e-06, "loss": 0.3498, "step": 886 }, { "epoch": 1.7024952015355086, "grad_norm": 0.5403158664703369, "learning_rate": 4.698536425944561e-06, "loss": 0.3448, "step": 887 }, { "epoch": 1.7044145873320538, "grad_norm": 0.5985568165779114, "learning_rate": 4.687385419357644e-06, "loss": 0.383, "step": 888 }, { "epoch": 1.706333973128599, "grad_norm": 0.5150626301765442, "learning_rate": 4.6762359735380135e-06, "loss": 0.3815, "step": 889 }, { "epoch": 1.7082533589251438, "grad_norm": 0.49250635504722595, "learning_rate": 4.665088144150666e-06, "loss": 0.3473, "step": 890 }, { "epoch": 1.710172744721689, "grad_norm": 0.5833713412284851, "learning_rate": 4.653941986852533e-06, "loss": 0.3519, "step": 891 }, { "epoch": 1.7120921305182342, "grad_norm": 0.5237048864364624, "learning_rate": 4.642797557292193e-06, "loss": 0.3606, "step": 892 }, { "epoch": 1.714011516314779, "grad_norm": 0.5467256307601929, "learning_rate": 4.6316549111096e-06, "loss": 0.3691, "step": 893 }, { "epoch": 1.7159309021113245, "grad_norm": 0.47231540083885193, "learning_rate": 4.6205141039358055e-06, "loss": 0.3591, "step": 894 }, { "epoch": 1.7178502879078694, "grad_norm": 0.4774062931537628, "learning_rate": 4.60937519139268e-06, "loss": 0.3508, "step": 895 }, { "epoch": 1.7197696737044146, "grad_norm": 0.5198578238487244, "learning_rate": 4.598238229092634e-06, "loss": 0.3442, "step": 896 }, { "epoch": 1.7216890595009597, "grad_norm": 0.5240543484687805, "learning_rate": 4.587103272638339e-06, "loss": 0.3833, "step": 897 }, { "epoch": 1.7236084452975047, "grad_norm": 0.5041773915290833, "learning_rate": 4.575970377622456e-06, "loss": 0.3712, "step": 898 }, { "epoch": 1.72552783109405, "grad_norm": 0.4888068735599518, "learning_rate": 4.564839599627347e-06, "loss": 0.343, "step": 899 }, { "epoch": 1.727447216890595, "grad_norm": 0.48213711380958557, "learning_rate": 4.553710994224816e-06, "loss": 0.3638, "step": 900 }, { "epoch": 1.7293666026871402, "grad_norm": 0.4732985198497772, "learning_rate": 4.542584616975811e-06, "loss": 0.3267, "step": 901 }, { "epoch": 1.7312859884836853, "grad_norm": 0.5251859426498413, "learning_rate": 4.531460523430157e-06, "loss": 0.3531, "step": 902 }, { "epoch": 1.7332053742802302, "grad_norm": 0.5464438199996948, "learning_rate": 4.5203387691262774e-06, "loss": 0.3694, "step": 903 }, { "epoch": 1.7351247600767754, "grad_norm": 0.49600520730018616, "learning_rate": 4.509219409590922e-06, "loss": 0.3371, "step": 904 }, { "epoch": 1.7370441458733206, "grad_norm": 0.4981859028339386, "learning_rate": 4.498102500338879e-06, "loss": 0.3402, "step": 905 }, { "epoch": 1.7389635316698655, "grad_norm": 0.525546133518219, "learning_rate": 4.486988096872701e-06, "loss": 0.3734, "step": 906 }, { "epoch": 1.7408829174664109, "grad_norm": 0.49170613288879395, "learning_rate": 4.475876254682436e-06, "loss": 0.3787, "step": 907 }, { "epoch": 1.7428023032629558, "grad_norm": 0.5139739513397217, "learning_rate": 4.464767029245341e-06, "loss": 0.355, "step": 908 }, { "epoch": 1.744721689059501, "grad_norm": 0.4535714387893677, "learning_rate": 4.453660476025612e-06, "loss": 0.38, "step": 909 }, { "epoch": 1.7466410748560461, "grad_norm": 0.48025500774383545, "learning_rate": 4.442556650474099e-06, "loss": 0.3605, "step": 910 }, { "epoch": 1.748560460652591, "grad_norm": 0.5039131045341492, "learning_rate": 4.431455608028038e-06, "loss": 0.3627, "step": 911 }, { "epoch": 1.7504798464491362, "grad_norm": 0.49448612332344055, "learning_rate": 4.420357404110765e-06, "loss": 0.3536, "step": 912 }, { "epoch": 1.7523992322456814, "grad_norm": 0.4768318235874176, "learning_rate": 4.40926209413145e-06, "loss": 0.358, "step": 913 }, { "epoch": 1.7543186180422263, "grad_norm": 0.5278033018112183, "learning_rate": 4.398169733484811e-06, "loss": 0.3448, "step": 914 }, { "epoch": 1.7562380038387717, "grad_norm": 0.4738544225692749, "learning_rate": 4.387080377550843e-06, "loss": 0.3617, "step": 915 }, { "epoch": 1.7581573896353166, "grad_norm": 0.5066388249397278, "learning_rate": 4.375994081694535e-06, "loss": 0.3462, "step": 916 }, { "epoch": 1.7600767754318618, "grad_norm": 0.47527310252189636, "learning_rate": 4.364910901265607e-06, "loss": 0.3699, "step": 917 }, { "epoch": 1.761996161228407, "grad_norm": 0.4713718593120575, "learning_rate": 4.353830891598216e-06, "loss": 0.3694, "step": 918 }, { "epoch": 1.763915547024952, "grad_norm": 0.4779852032661438, "learning_rate": 4.342754108010695e-06, "loss": 0.3402, "step": 919 }, { "epoch": 1.7658349328214973, "grad_norm": 0.48042553663253784, "learning_rate": 4.331680605805264e-06, "loss": 0.3566, "step": 920 }, { "epoch": 1.7677543186180422, "grad_norm": 0.49362003803253174, "learning_rate": 4.320610440267766e-06, "loss": 0.3519, "step": 921 }, { "epoch": 1.7696737044145874, "grad_norm": 0.4843592047691345, "learning_rate": 4.309543666667385e-06, "loss": 0.3481, "step": 922 }, { "epoch": 1.7715930902111325, "grad_norm": 0.4950464963912964, "learning_rate": 4.298480340256365e-06, "loss": 0.3629, "step": 923 }, { "epoch": 1.7735124760076775, "grad_norm": 0.47693338990211487, "learning_rate": 4.287420516269745e-06, "loss": 0.3658, "step": 924 }, { "epoch": 1.7754318618042226, "grad_norm": 0.4958014488220215, "learning_rate": 4.2763642499250765e-06, "loss": 0.3666, "step": 925 }, { "epoch": 1.7773512476007678, "grad_norm": 0.5680451393127441, "learning_rate": 4.265311596422147e-06, "loss": 0.3272, "step": 926 }, { "epoch": 1.7792706333973127, "grad_norm": 0.512785792350769, "learning_rate": 4.254262610942707e-06, "loss": 0.3576, "step": 927 }, { "epoch": 1.781190019193858, "grad_norm": 0.46451514959335327, "learning_rate": 4.243217348650197e-06, "loss": 0.3624, "step": 928 }, { "epoch": 1.783109404990403, "grad_norm": 0.5016498565673828, "learning_rate": 4.232175864689464e-06, "loss": 0.3659, "step": 929 }, { "epoch": 1.7850287907869482, "grad_norm": 0.47316527366638184, "learning_rate": 4.221138214186493e-06, "loss": 0.3823, "step": 930 }, { "epoch": 1.7869481765834934, "grad_norm": 0.4964522421360016, "learning_rate": 4.210104452248135e-06, "loss": 0.3541, "step": 931 }, { "epoch": 1.7888675623800383, "grad_norm": 0.47671929001808167, "learning_rate": 4.199074633961822e-06, "loss": 0.346, "step": 932 }, { "epoch": 1.7907869481765835, "grad_norm": 0.473956823348999, "learning_rate": 4.188048814395293e-06, "loss": 0.3525, "step": 933 }, { "epoch": 1.7927063339731286, "grad_norm": 0.4765279293060303, "learning_rate": 4.17702704859633e-06, "loss": 0.3732, "step": 934 }, { "epoch": 1.7946257197696736, "grad_norm": 0.46462586522102356, "learning_rate": 4.166009391592476e-06, "loss": 0.3701, "step": 935 }, { "epoch": 1.796545105566219, "grad_norm": 0.4972460865974426, "learning_rate": 4.154995898390756e-06, "loss": 0.368, "step": 936 }, { "epoch": 1.7984644913627639, "grad_norm": 0.4621984660625458, "learning_rate": 4.1439866239774065e-06, "loss": 0.352, "step": 937 }, { "epoch": 1.800383877159309, "grad_norm": 0.4703991115093231, "learning_rate": 4.132981623317606e-06, "loss": 0.3751, "step": 938 }, { "epoch": 1.8023032629558542, "grad_norm": 0.5410139560699463, "learning_rate": 4.12198095135519e-06, "loss": 0.3378, "step": 939 }, { "epoch": 1.8042226487523991, "grad_norm": 0.5196885466575623, "learning_rate": 4.110984663012388e-06, "loss": 0.36, "step": 940 }, { "epoch": 1.8061420345489443, "grad_norm": 0.5235639810562134, "learning_rate": 4.09999281318954e-06, "loss": 0.3526, "step": 941 }, { "epoch": 1.8080614203454894, "grad_norm": 0.5446470379829407, "learning_rate": 4.089005456764828e-06, "loss": 0.3791, "step": 942 }, { "epoch": 1.8099808061420346, "grad_norm": 0.5020136833190918, "learning_rate": 4.078022648593997e-06, "loss": 0.3771, "step": 943 }, { "epoch": 1.8119001919385798, "grad_norm": 0.5756138563156128, "learning_rate": 4.06704444351009e-06, "loss": 0.3604, "step": 944 }, { "epoch": 1.8138195777351247, "grad_norm": 0.5533238649368286, "learning_rate": 4.056070896323163e-06, "loss": 0.3541, "step": 945 }, { "epoch": 1.8157389635316699, "grad_norm": 0.451406329870224, "learning_rate": 4.0451020618200196e-06, "loss": 0.3888, "step": 946 }, { "epoch": 1.817658349328215, "grad_norm": 0.46836549043655396, "learning_rate": 4.034137994763934e-06, "loss": 0.339, "step": 947 }, { "epoch": 1.81957773512476, "grad_norm": 0.5029658675193787, "learning_rate": 4.0231787498943785e-06, "loss": 0.3737, "step": 948 }, { "epoch": 1.8214971209213053, "grad_norm": 0.49353134632110596, "learning_rate": 4.01222438192675e-06, "loss": 0.3534, "step": 949 }, { "epoch": 1.8234165067178503, "grad_norm": 0.48148056864738464, "learning_rate": 4.001274945552098e-06, "loss": 0.3722, "step": 950 }, { "epoch": 1.8253358925143954, "grad_norm": 0.5020965337753296, "learning_rate": 3.990330495436848e-06, "loss": 0.3541, "step": 951 }, { "epoch": 1.8272552783109406, "grad_norm": 0.5725180506706238, "learning_rate": 3.979391086222531e-06, "loss": 0.3812, "step": 952 }, { "epoch": 1.8291746641074855, "grad_norm": 0.4800430238246918, "learning_rate": 3.968456772525515e-06, "loss": 0.3707, "step": 953 }, { "epoch": 1.8310940499040307, "grad_norm": 0.4821717143058777, "learning_rate": 3.9575276089367236e-06, "loss": 0.3558, "step": 954 }, { "epoch": 1.8330134357005758, "grad_norm": 0.5291041135787964, "learning_rate": 3.94660365002137e-06, "loss": 0.3531, "step": 955 }, { "epoch": 1.8349328214971208, "grad_norm": 0.5708112120628357, "learning_rate": 3.935684950318679e-06, "loss": 0.351, "step": 956 }, { "epoch": 1.8368522072936662, "grad_norm": 0.5347475409507751, "learning_rate": 3.924771564341621e-06, "loss": 0.3663, "step": 957 }, { "epoch": 1.838771593090211, "grad_norm": 0.46430549025535583, "learning_rate": 3.91386354657664e-06, "loss": 0.3627, "step": 958 }, { "epoch": 1.8406909788867563, "grad_norm": 0.4990037679672241, "learning_rate": 3.902960951483375e-06, "loss": 0.3583, "step": 959 }, { "epoch": 1.8426103646833014, "grad_norm": 0.5649574995040894, "learning_rate": 3.892063833494387e-06, "loss": 0.3662, "step": 960 }, { "epoch": 1.8445297504798464, "grad_norm": 0.5024659037590027, "learning_rate": 3.881172247014899e-06, "loss": 0.3525, "step": 961 }, { "epoch": 1.8464491362763915, "grad_norm": 0.5161370635032654, "learning_rate": 3.870286246422519e-06, "loss": 0.3828, "step": 962 }, { "epoch": 1.8483685220729367, "grad_norm": 0.5517075061798096, "learning_rate": 3.859405886066959e-06, "loss": 0.3462, "step": 963 }, { "epoch": 1.8502879078694816, "grad_norm": 0.4517953097820282, "learning_rate": 3.848531220269775e-06, "loss": 0.3551, "step": 964 }, { "epoch": 1.852207293666027, "grad_norm": 0.48516982793807983, "learning_rate": 3.837662303324093e-06, "loss": 0.3791, "step": 965 }, { "epoch": 1.854126679462572, "grad_norm": 0.5237125158309937, "learning_rate": 3.826799189494336e-06, "loss": 0.3443, "step": 966 }, { "epoch": 1.856046065259117, "grad_norm": 0.5011300444602966, "learning_rate": 3.815941933015956e-06, "loss": 0.3626, "step": 967 }, { "epoch": 1.8579654510556622, "grad_norm": 0.4510292708873749, "learning_rate": 3.805090588095159e-06, "loss": 0.3899, "step": 968 }, { "epoch": 1.8598848368522072, "grad_norm": 0.45733538269996643, "learning_rate": 3.794245208908639e-06, "loss": 0.3413, "step": 969 }, { "epoch": 1.8618042226487526, "grad_norm": 0.4739803969860077, "learning_rate": 3.783405849603302e-06, "loss": 0.3925, "step": 970 }, { "epoch": 1.8637236084452975, "grad_norm": 0.4972767233848572, "learning_rate": 3.7725725642960047e-06, "loss": 0.3456, "step": 971 }, { "epoch": 1.8656429942418427, "grad_norm": 0.5183765888214111, "learning_rate": 3.7617454070732734e-06, "loss": 0.3878, "step": 972 }, { "epoch": 1.8675623800383878, "grad_norm": 0.4547247290611267, "learning_rate": 3.750924431991041e-06, "loss": 0.382, "step": 973 }, { "epoch": 1.8694817658349328, "grad_norm": 0.44835424423217773, "learning_rate": 3.7401096930743753e-06, "loss": 0.3752, "step": 974 }, { "epoch": 1.871401151631478, "grad_norm": 0.4450339674949646, "learning_rate": 3.729301244317208e-06, "loss": 0.3482, "step": 975 }, { "epoch": 1.873320537428023, "grad_norm": 0.5184307098388672, "learning_rate": 3.7184991396820703e-06, "loss": 0.3612, "step": 976 }, { "epoch": 1.875239923224568, "grad_norm": 0.46870970726013184, "learning_rate": 3.7077034330998154e-06, "loss": 0.3622, "step": 977 }, { "epoch": 1.8771593090211134, "grad_norm": 0.47299548983573914, "learning_rate": 3.6969141784693546e-06, "loss": 0.3633, "step": 978 }, { "epoch": 1.8790786948176583, "grad_norm": 0.47452953457832336, "learning_rate": 3.686131429657387e-06, "loss": 0.3677, "step": 979 }, { "epoch": 1.8809980806142035, "grad_norm": 0.5392640829086304, "learning_rate": 3.675355240498133e-06, "loss": 0.3452, "step": 980 }, { "epoch": 1.8829174664107486, "grad_norm": 0.5191397070884705, "learning_rate": 3.6645856647930593e-06, "loss": 0.3737, "step": 981 }, { "epoch": 1.8848368522072936, "grad_norm": 0.5295866131782532, "learning_rate": 3.6538227563106168e-06, "loss": 0.3612, "step": 982 }, { "epoch": 1.8867562380038387, "grad_norm": 0.45304420590400696, "learning_rate": 3.643066568785969e-06, "loss": 0.3405, "step": 983 }, { "epoch": 1.888675623800384, "grad_norm": 0.5126438140869141, "learning_rate": 3.6323171559207193e-06, "loss": 0.3575, "step": 984 }, { "epoch": 1.8905950095969288, "grad_norm": 0.5236092209815979, "learning_rate": 3.6215745713826585e-06, "loss": 0.3518, "step": 985 }, { "epoch": 1.8925143953934742, "grad_norm": 0.4707767963409424, "learning_rate": 3.6108388688054773e-06, "loss": 0.3804, "step": 986 }, { "epoch": 1.8944337811900192, "grad_norm": 0.4828234910964966, "learning_rate": 3.6001101017885086e-06, "loss": 0.3777, "step": 987 }, { "epoch": 1.8963531669865643, "grad_norm": 0.48757752776145935, "learning_rate": 3.589388323896459e-06, "loss": 0.3719, "step": 988 }, { "epoch": 1.8982725527831095, "grad_norm": 0.5175418257713318, "learning_rate": 3.578673588659145e-06, "loss": 0.3495, "step": 989 }, { "epoch": 1.9001919385796544, "grad_norm": 0.5424110293388367, "learning_rate": 3.567965949571219e-06, "loss": 0.3442, "step": 990 }, { "epoch": 1.9021113243761996, "grad_norm": 0.4497435986995697, "learning_rate": 3.557265460091902e-06, "loss": 0.3421, "step": 991 }, { "epoch": 1.9040307101727447, "grad_norm": 0.4866499602794647, "learning_rate": 3.5465721736447236e-06, "loss": 0.3728, "step": 992 }, { "epoch": 1.9059500959692899, "grad_norm": 0.4973314106464386, "learning_rate": 3.5358861436172487e-06, "loss": 0.3618, "step": 993 }, { "epoch": 1.907869481765835, "grad_norm": 0.4718598425388336, "learning_rate": 3.5252074233608175e-06, "loss": 0.3614, "step": 994 }, { "epoch": 1.90978886756238, "grad_norm": 0.46350330114364624, "learning_rate": 3.5145360661902717e-06, "loss": 0.3438, "step": 995 }, { "epoch": 1.9117082533589251, "grad_norm": 0.47142237424850464, "learning_rate": 3.503872125383693e-06, "loss": 0.3562, "step": 996 }, { "epoch": 1.9136276391554703, "grad_norm": 0.5113832950592041, "learning_rate": 3.493215654182134e-06, "loss": 0.3323, "step": 997 }, { "epoch": 1.9155470249520152, "grad_norm": 0.5358283519744873, "learning_rate": 3.48256670578936e-06, "loss": 0.3721, "step": 998 }, { "epoch": 1.9174664107485606, "grad_norm": 0.5174738764762878, "learning_rate": 3.471925333371572e-06, "loss": 0.345, "step": 999 }, { "epoch": 1.9193857965451055, "grad_norm": 0.5600181818008423, "learning_rate": 3.4612915900571493e-06, "loss": 0.3521, "step": 1000 }, { "epoch": 1.9213051823416507, "grad_norm": 0.49096786975860596, "learning_rate": 3.4506655289363815e-06, "loss": 0.3739, "step": 1001 }, { "epoch": 1.9232245681381959, "grad_norm": 0.47550779581069946, "learning_rate": 3.4400472030612035e-06, "loss": 0.3341, "step": 1002 }, { "epoch": 1.9251439539347408, "grad_norm": 0.5044779181480408, "learning_rate": 3.429436665444934e-06, "loss": 0.3495, "step": 1003 }, { "epoch": 1.927063339731286, "grad_norm": 0.46287402510643005, "learning_rate": 3.4188339690620033e-06, "loss": 0.3727, "step": 1004 }, { "epoch": 1.9289827255278311, "grad_norm": 0.5236227512359619, "learning_rate": 3.408239166847696e-06, "loss": 0.3458, "step": 1005 }, { "epoch": 1.930902111324376, "grad_norm": 0.4995711147785187, "learning_rate": 3.397652311697883e-06, "loss": 0.3491, "step": 1006 }, { "epoch": 1.9328214971209214, "grad_norm": 0.5061910152435303, "learning_rate": 3.387073456468761e-06, "loss": 0.3724, "step": 1007 }, { "epoch": 1.9347408829174664, "grad_norm": 0.5521465539932251, "learning_rate": 3.3765026539765832e-06, "loss": 0.3625, "step": 1008 }, { "epoch": 1.9366602687140115, "grad_norm": 0.49945539236068726, "learning_rate": 3.365939956997399e-06, "loss": 0.3641, "step": 1009 }, { "epoch": 1.9385796545105567, "grad_norm": 0.49057915806770325, "learning_rate": 3.355385418266792e-06, "loss": 0.3779, "step": 1010 }, { "epoch": 1.9404990403071016, "grad_norm": 0.5111267566680908, "learning_rate": 3.344839090479609e-06, "loss": 0.3718, "step": 1011 }, { "epoch": 1.9424184261036468, "grad_norm": 0.502008855342865, "learning_rate": 3.3343010262897125e-06, "loss": 0.372, "step": 1012 }, { "epoch": 1.944337811900192, "grad_norm": 0.4915367662906647, "learning_rate": 3.3237712783097003e-06, "loss": 0.3887, "step": 1013 }, { "epoch": 1.9462571976967369, "grad_norm": 0.4790264666080475, "learning_rate": 3.3132498991106515e-06, "loss": 0.3593, "step": 1014 }, { "epoch": 1.9481765834932823, "grad_norm": 0.48164817690849304, "learning_rate": 3.3027369412218623e-06, "loss": 0.36, "step": 1015 }, { "epoch": 1.9500959692898272, "grad_norm": 0.4421576261520386, "learning_rate": 3.2922324571305908e-06, "loss": 0.3484, "step": 1016 }, { "epoch": 1.9520153550863724, "grad_norm": 0.46843478083610535, "learning_rate": 3.2817364992817835e-06, "loss": 0.3491, "step": 1017 }, { "epoch": 1.9539347408829175, "grad_norm": 0.48605877161026, "learning_rate": 3.2712491200778152e-06, "loss": 0.3461, "step": 1018 }, { "epoch": 1.9558541266794625, "grad_norm": 0.47587305307388306, "learning_rate": 3.260770371878236e-06, "loss": 0.3529, "step": 1019 }, { "epoch": 1.9577735124760078, "grad_norm": 0.43997547030448914, "learning_rate": 3.2503003069995057e-06, "loss": 0.3662, "step": 1020 }, { "epoch": 1.9596928982725528, "grad_norm": 0.48544999957084656, "learning_rate": 3.239838977714728e-06, "loss": 0.344, "step": 1021 }, { "epoch": 1.961612284069098, "grad_norm": 0.49937525391578674, "learning_rate": 3.2293864362533923e-06, "loss": 0.3255, "step": 1022 }, { "epoch": 1.963531669865643, "grad_norm": 0.45761093497276306, "learning_rate": 3.2189427348011174e-06, "loss": 0.3334, "step": 1023 }, { "epoch": 1.965451055662188, "grad_norm": 0.5440167188644409, "learning_rate": 3.2085079254993845e-06, "loss": 0.37, "step": 1024 }, { "epoch": 1.9673704414587332, "grad_norm": 0.5139799118041992, "learning_rate": 3.198082060445281e-06, "loss": 0.3702, "step": 1025 }, { "epoch": 1.9692898272552783, "grad_norm": 0.4792081117630005, "learning_rate": 3.1876651916912395e-06, "loss": 0.3505, "step": 1026 }, { "epoch": 1.9712092130518233, "grad_norm": 0.4661388695240021, "learning_rate": 3.1772573712447753e-06, "loss": 0.382, "step": 1027 }, { "epoch": 1.9731285988483687, "grad_norm": 0.5419757962226868, "learning_rate": 3.1668586510682287e-06, "loss": 0.3406, "step": 1028 }, { "epoch": 1.9750479846449136, "grad_norm": 0.46002650260925293, "learning_rate": 3.1564690830785106e-06, "loss": 0.3738, "step": 1029 }, { "epoch": 1.9769673704414588, "grad_norm": 0.47731947898864746, "learning_rate": 3.1460887191468324e-06, "loss": 0.3829, "step": 1030 }, { "epoch": 1.978886756238004, "grad_norm": 0.5072636008262634, "learning_rate": 3.1357176110984578e-06, "loss": 0.3573, "step": 1031 }, { "epoch": 1.9808061420345489, "grad_norm": 0.5447486639022827, "learning_rate": 3.1253558107124354e-06, "loss": 0.3526, "step": 1032 }, { "epoch": 1.982725527831094, "grad_norm": 0.4980373680591583, "learning_rate": 3.115003369721346e-06, "loss": 0.3708, "step": 1033 }, { "epoch": 1.9846449136276392, "grad_norm": 0.4977683126926422, "learning_rate": 3.104660339811044e-06, "loss": 0.342, "step": 1034 }, { "epoch": 1.986564299424184, "grad_norm": 0.5453054308891296, "learning_rate": 3.0943267726203965e-06, "loss": 0.3553, "step": 1035 }, { "epoch": 1.9884836852207295, "grad_norm": 0.5266937613487244, "learning_rate": 3.0840027197410245e-06, "loss": 0.3723, "step": 1036 }, { "epoch": 1.9904030710172744, "grad_norm": 0.5179019570350647, "learning_rate": 3.0736882327170502e-06, "loss": 0.3753, "step": 1037 }, { "epoch": 1.9923224568138196, "grad_norm": 0.45097804069519043, "learning_rate": 3.0633833630448378e-06, "loss": 0.3428, "step": 1038 }, { "epoch": 1.9942418426103647, "grad_norm": 0.4913027584552765, "learning_rate": 3.053088162172734e-06, "loss": 0.3534, "step": 1039 }, { "epoch": 1.9961612284069097, "grad_norm": 0.5047955513000488, "learning_rate": 3.042802681500814e-06, "loss": 0.344, "step": 1040 }, { "epoch": 1.9980806142034548, "grad_norm": 0.4735261797904968, "learning_rate": 3.0325269723806212e-06, "loss": 0.3891, "step": 1041 }, { "epoch": 2.0, "grad_norm": 0.47839510440826416, "learning_rate": 3.022261086114914e-06, "loss": 0.3615, "step": 1042 }, { "epoch": 2.001919385796545, "grad_norm": 0.5187305212020874, "learning_rate": 3.012005073957413e-06, "loss": 0.2965, "step": 1043 }, { "epoch": 2.0038387715930903, "grad_norm": 0.46550580859184265, "learning_rate": 3.0017589871125387e-06, "loss": 0.339, "step": 1044 }, { "epoch": 2.0057581573896353, "grad_norm": 0.4512456953525543, "learning_rate": 2.991522876735154e-06, "loss": 0.3441, "step": 1045 }, { "epoch": 2.0076775431861806, "grad_norm": 0.4873534142971039, "learning_rate": 2.9812967939303173e-06, "loss": 0.3065, "step": 1046 }, { "epoch": 2.0095969289827256, "grad_norm": 0.5146908760070801, "learning_rate": 2.9710807897530257e-06, "loss": 0.3253, "step": 1047 }, { "epoch": 2.0115163147792705, "grad_norm": 0.5006998181343079, "learning_rate": 2.9608749152079526e-06, "loss": 0.3185, "step": 1048 }, { "epoch": 2.013435700575816, "grad_norm": 0.5323864817619324, "learning_rate": 2.9506792212491987e-06, "loss": 0.3328, "step": 1049 }, { "epoch": 2.015355086372361, "grad_norm": 0.5378642082214355, "learning_rate": 2.9404937587800374e-06, "loss": 0.323, "step": 1050 }, { "epoch": 2.0172744721689058, "grad_norm": 0.5049008727073669, "learning_rate": 2.9303185786526617e-06, "loss": 0.3183, "step": 1051 }, { "epoch": 2.019193857965451, "grad_norm": 0.48577630519866943, "learning_rate": 2.920153731667928e-06, "loss": 0.3047, "step": 1052 }, { "epoch": 2.021113243761996, "grad_norm": 0.4985201060771942, "learning_rate": 2.9099992685751015e-06, "loss": 0.3052, "step": 1053 }, { "epoch": 2.0230326295585415, "grad_norm": 0.49699562788009644, "learning_rate": 2.8998552400716063e-06, "loss": 0.3145, "step": 1054 }, { "epoch": 2.0249520153550864, "grad_norm": 0.47644802927970886, "learning_rate": 2.889721696802768e-06, "loss": 0.3017, "step": 1055 }, { "epoch": 2.0268714011516313, "grad_norm": 0.4731806516647339, "learning_rate": 2.879598689361569e-06, "loss": 0.3014, "step": 1056 }, { "epoch": 2.0287907869481767, "grad_norm": 0.4593782424926758, "learning_rate": 2.8694862682883867e-06, "loss": 0.3222, "step": 1057 }, { "epoch": 2.0307101727447217, "grad_norm": 0.5299936532974243, "learning_rate": 2.859384484070741e-06, "loss": 0.3211, "step": 1058 }, { "epoch": 2.0326295585412666, "grad_norm": 0.45662248134613037, "learning_rate": 2.84929338714305e-06, "loss": 0.3131, "step": 1059 }, { "epoch": 2.034548944337812, "grad_norm": 0.49857136607170105, "learning_rate": 2.839213027886373e-06, "loss": 0.3113, "step": 1060 }, { "epoch": 2.036468330134357, "grad_norm": 0.4966824948787689, "learning_rate": 2.8291434566281654e-06, "loss": 0.3035, "step": 1061 }, { "epoch": 2.0383877159309023, "grad_norm": 0.521865963935852, "learning_rate": 2.819084723642015e-06, "loss": 0.315, "step": 1062 }, { "epoch": 2.0403071017274472, "grad_norm": 0.4518691599369049, "learning_rate": 2.809036879147401e-06, "loss": 0.3225, "step": 1063 }, { "epoch": 2.042226487523992, "grad_norm": 0.4607463777065277, "learning_rate": 2.7989999733094398e-06, "loss": 0.2825, "step": 1064 }, { "epoch": 2.0441458733205375, "grad_norm": 0.46626198291778564, "learning_rate": 2.7889740562386357e-06, "loss": 0.3234, "step": 1065 }, { "epoch": 2.0460652591170825, "grad_norm": 0.5046021342277527, "learning_rate": 2.7789591779906305e-06, "loss": 0.3066, "step": 1066 }, { "epoch": 2.047984644913628, "grad_norm": 0.47893571853637695, "learning_rate": 2.768955388565953e-06, "loss": 0.3153, "step": 1067 }, { "epoch": 2.049904030710173, "grad_norm": 0.4459957182407379, "learning_rate": 2.7589627379097693e-06, "loss": 0.3137, "step": 1068 }, { "epoch": 2.0518234165067177, "grad_norm": 0.47269561886787415, "learning_rate": 2.748981275911633e-06, "loss": 0.2953, "step": 1069 }, { "epoch": 2.053742802303263, "grad_norm": 0.45105835795402527, "learning_rate": 2.7390110524052415e-06, "loss": 0.306, "step": 1070 }, { "epoch": 2.055662188099808, "grad_norm": 0.45647212862968445, "learning_rate": 2.7290521171681772e-06, "loss": 0.3116, "step": 1071 }, { "epoch": 2.057581573896353, "grad_norm": 0.4257984459400177, "learning_rate": 2.7191045199216666e-06, "loss": 0.3175, "step": 1072 }, { "epoch": 2.0595009596928984, "grad_norm": 0.45926162600517273, "learning_rate": 2.709168310330329e-06, "loss": 0.3018, "step": 1073 }, { "epoch": 2.0614203454894433, "grad_norm": 0.456083744764328, "learning_rate": 2.699243538001931e-06, "loss": 0.3223, "step": 1074 }, { "epoch": 2.0633397312859887, "grad_norm": 0.44238269329071045, "learning_rate": 2.6893302524871357e-06, "loss": 0.311, "step": 1075 }, { "epoch": 2.0652591170825336, "grad_norm": 0.4631599485874176, "learning_rate": 2.6794285032792577e-06, "loss": 0.3108, "step": 1076 }, { "epoch": 2.0671785028790786, "grad_norm": 0.43326032161712646, "learning_rate": 2.6695383398140155e-06, "loss": 0.3029, "step": 1077 }, { "epoch": 2.069097888675624, "grad_norm": 0.44427675008773804, "learning_rate": 2.6596598114692814e-06, "loss": 0.2877, "step": 1078 }, { "epoch": 2.071017274472169, "grad_norm": 0.44275227189064026, "learning_rate": 2.6497929675648435e-06, "loss": 0.2965, "step": 1079 }, { "epoch": 2.072936660268714, "grad_norm": 0.46986308693885803, "learning_rate": 2.6399378573621493e-06, "loss": 0.3146, "step": 1080 }, { "epoch": 2.074856046065259, "grad_norm": 0.4541390538215637, "learning_rate": 2.6300945300640678e-06, "loss": 0.3212, "step": 1081 }, { "epoch": 2.076775431861804, "grad_norm": 0.4475952684879303, "learning_rate": 2.6202630348146323e-06, "loss": 0.3424, "step": 1082 }, { "epoch": 2.0786948176583495, "grad_norm": 0.44391414523124695, "learning_rate": 2.610443420698815e-06, "loss": 0.3201, "step": 1083 }, { "epoch": 2.0806142034548945, "grad_norm": 0.4814508259296417, "learning_rate": 2.600635736742262e-06, "loss": 0.3205, "step": 1084 }, { "epoch": 2.0825335892514394, "grad_norm": 0.41559576988220215, "learning_rate": 2.5908400319110588e-06, "loss": 0.3085, "step": 1085 }, { "epoch": 2.0844529750479848, "grad_norm": 0.4773661196231842, "learning_rate": 2.581056355111484e-06, "loss": 0.3103, "step": 1086 }, { "epoch": 2.0863723608445297, "grad_norm": 0.46356895565986633, "learning_rate": 2.5712847551897613e-06, "loss": 0.3362, "step": 1087 }, { "epoch": 2.0882917466410746, "grad_norm": 0.4625107944011688, "learning_rate": 2.5615252809318287e-06, "loss": 0.3179, "step": 1088 }, { "epoch": 2.09021113243762, "grad_norm": 0.44741785526275635, "learning_rate": 2.5517779810630725e-06, "loss": 0.3044, "step": 1089 }, { "epoch": 2.092130518234165, "grad_norm": 0.44088098406791687, "learning_rate": 2.5420429042481054e-06, "loss": 0.331, "step": 1090 }, { "epoch": 2.0940499040307103, "grad_norm": 0.4703546166419983, "learning_rate": 2.5323200990905106e-06, "loss": 0.3028, "step": 1091 }, { "epoch": 2.0959692898272553, "grad_norm": 0.4273625910282135, "learning_rate": 2.5226096141326095e-06, "loss": 0.3244, "step": 1092 }, { "epoch": 2.0978886756238, "grad_norm": 0.470930278301239, "learning_rate": 2.512911497855207e-06, "loss": 0.2876, "step": 1093 }, { "epoch": 2.0998080614203456, "grad_norm": 0.463149756193161, "learning_rate": 2.503225798677359e-06, "loss": 0.3141, "step": 1094 }, { "epoch": 2.1017274472168905, "grad_norm": 0.43880772590637207, "learning_rate": 2.493552564956126e-06, "loss": 0.3171, "step": 1095 }, { "epoch": 2.103646833013436, "grad_norm": 0.45119890570640564, "learning_rate": 2.483891844986334e-06, "loss": 0.311, "step": 1096 }, { "epoch": 2.105566218809981, "grad_norm": 0.42207521200180054, "learning_rate": 2.4742436870003326e-06, "loss": 0.3235, "step": 1097 }, { "epoch": 2.107485604606526, "grad_norm": 0.43191346526145935, "learning_rate": 2.464608139167754e-06, "loss": 0.3214, "step": 1098 }, { "epoch": 2.109404990403071, "grad_norm": 0.44787243008613586, "learning_rate": 2.4549852495952727e-06, "loss": 0.3294, "step": 1099 }, { "epoch": 2.111324376199616, "grad_norm": 0.46976569294929504, "learning_rate": 2.445375066326362e-06, "loss": 0.3098, "step": 1100 }, { "epoch": 2.113243761996161, "grad_norm": 0.4376424252986908, "learning_rate": 2.4357776373410656e-06, "loss": 0.3072, "step": 1101 }, { "epoch": 2.1151631477927064, "grad_norm": 0.44315680861473083, "learning_rate": 2.426193010555743e-06, "loss": 0.3212, "step": 1102 }, { "epoch": 2.1170825335892514, "grad_norm": 0.42132890224456787, "learning_rate": 2.4166212338228384e-06, "loss": 0.3096, "step": 1103 }, { "epoch": 2.1190019193857967, "grad_norm": 0.4394184648990631, "learning_rate": 2.4070623549306404e-06, "loss": 0.3172, "step": 1104 }, { "epoch": 2.1209213051823417, "grad_norm": 0.44283023476600647, "learning_rate": 2.3975164216030456e-06, "loss": 0.3413, "step": 1105 }, { "epoch": 2.1228406909788866, "grad_norm": 0.4421125650405884, "learning_rate": 2.3879834814993153e-06, "loss": 0.3006, "step": 1106 }, { "epoch": 2.124760076775432, "grad_norm": 0.46579116582870483, "learning_rate": 2.3784635822138424e-06, "loss": 0.3147, "step": 1107 }, { "epoch": 2.126679462571977, "grad_norm": 0.45992574095726013, "learning_rate": 2.368956771275912e-06, "loss": 0.3129, "step": 1108 }, { "epoch": 2.128598848368522, "grad_norm": 0.41321638226509094, "learning_rate": 2.3594630961494615e-06, "loss": 0.3112, "step": 1109 }, { "epoch": 2.1305182341650672, "grad_norm": 0.458307147026062, "learning_rate": 2.349982604232851e-06, "loss": 0.2916, "step": 1110 }, { "epoch": 2.132437619961612, "grad_norm": 0.46551185846328735, "learning_rate": 2.340515342858618e-06, "loss": 0.3206, "step": 1111 }, { "epoch": 2.1343570057581576, "grad_norm": 0.411011666059494, "learning_rate": 2.3310613592932467e-06, "loss": 0.3212, "step": 1112 }, { "epoch": 2.1362763915547025, "grad_norm": 0.4040123224258423, "learning_rate": 2.3216207007369247e-06, "loss": 0.3295, "step": 1113 }, { "epoch": 2.1381957773512474, "grad_norm": 0.4294300675392151, "learning_rate": 2.3121934143233223e-06, "loss": 0.3206, "step": 1114 }, { "epoch": 2.140115163147793, "grad_norm": 0.4759252667427063, "learning_rate": 2.3027795471193404e-06, "loss": 0.3217, "step": 1115 }, { "epoch": 2.1420345489443378, "grad_norm": 0.4207714796066284, "learning_rate": 2.293379146124886e-06, "loss": 0.3184, "step": 1116 }, { "epoch": 2.1439539347408827, "grad_norm": 0.4560145437717438, "learning_rate": 2.283992258272634e-06, "loss": 0.2959, "step": 1117 }, { "epoch": 2.145873320537428, "grad_norm": 0.42828992009162903, "learning_rate": 2.274618930427789e-06, "loss": 0.3171, "step": 1118 }, { "epoch": 2.147792706333973, "grad_norm": 0.4540591835975647, "learning_rate": 2.265259209387867e-06, "loss": 0.3473, "step": 1119 }, { "epoch": 2.1497120921305184, "grad_norm": 0.4187568724155426, "learning_rate": 2.255913141882436e-06, "loss": 0.3182, "step": 1120 }, { "epoch": 2.1516314779270633, "grad_norm": 0.430813729763031, "learning_rate": 2.2465807745729057e-06, "loss": 0.3181, "step": 1121 }, { "epoch": 2.1535508637236083, "grad_norm": 0.42998677492141724, "learning_rate": 2.237262154052282e-06, "loss": 0.3076, "step": 1122 }, { "epoch": 2.1554702495201536, "grad_norm": 0.4052675664424896, "learning_rate": 2.2279573268449447e-06, "loss": 0.3346, "step": 1123 }, { "epoch": 2.1573896353166986, "grad_norm": 0.4575018584728241, "learning_rate": 2.2186663394064013e-06, "loss": 0.3135, "step": 1124 }, { "epoch": 2.159309021113244, "grad_norm": 0.4106724262237549, "learning_rate": 2.209389238123066e-06, "loss": 0.3083, "step": 1125 }, { "epoch": 2.161228406909789, "grad_norm": 0.434111088514328, "learning_rate": 2.2001260693120236e-06, "loss": 0.3323, "step": 1126 }, { "epoch": 2.163147792706334, "grad_norm": 0.4358835220336914, "learning_rate": 2.1908768792208e-06, "loss": 0.3093, "step": 1127 }, { "epoch": 2.165067178502879, "grad_norm": 0.4291764199733734, "learning_rate": 2.181641714027131e-06, "loss": 0.3015, "step": 1128 }, { "epoch": 2.166986564299424, "grad_norm": 0.4335598945617676, "learning_rate": 2.172420619838729e-06, "loss": 0.3206, "step": 1129 }, { "epoch": 2.168905950095969, "grad_norm": 0.48128899931907654, "learning_rate": 2.163213642693059e-06, "loss": 0.336, "step": 1130 }, { "epoch": 2.1708253358925145, "grad_norm": 0.4551694691181183, "learning_rate": 2.1540208285570997e-06, "loss": 0.3146, "step": 1131 }, { "epoch": 2.1727447216890594, "grad_norm": 0.46804335713386536, "learning_rate": 2.1448422233271256e-06, "loss": 0.2901, "step": 1132 }, { "epoch": 2.174664107485605, "grad_norm": 0.45482227206230164, "learning_rate": 2.135677872828467e-06, "loss": 0.2859, "step": 1133 }, { "epoch": 2.1765834932821497, "grad_norm": 0.4451681673526764, "learning_rate": 2.1265278228152864e-06, "loss": 0.3, "step": 1134 }, { "epoch": 2.1785028790786947, "grad_norm": 0.4171452820301056, "learning_rate": 2.1173921189703523e-06, "loss": 0.3323, "step": 1135 }, { "epoch": 2.18042226487524, "grad_norm": 0.4301266372203827, "learning_rate": 2.1082708069047993e-06, "loss": 0.3214, "step": 1136 }, { "epoch": 2.182341650671785, "grad_norm": 0.46128469705581665, "learning_rate": 2.0991639321579214e-06, "loss": 0.304, "step": 1137 }, { "epoch": 2.18426103646833, "grad_norm": 0.4355451464653015, "learning_rate": 2.0900715401969248e-06, "loss": 0.3147, "step": 1138 }, { "epoch": 2.1861804222648753, "grad_norm": 0.4441923201084137, "learning_rate": 2.0809936764167106e-06, "loss": 0.3353, "step": 1139 }, { "epoch": 2.1880998080614202, "grad_norm": 0.5264111757278442, "learning_rate": 2.0719303861396435e-06, "loss": 0.3086, "step": 1140 }, { "epoch": 2.1900191938579656, "grad_norm": 0.45619240403175354, "learning_rate": 2.0628817146153353e-06, "loss": 0.3018, "step": 1141 }, { "epoch": 2.1919385796545106, "grad_norm": 0.4669698476791382, "learning_rate": 2.053847707020406e-06, "loss": 0.3152, "step": 1142 }, { "epoch": 2.1938579654510555, "grad_norm": 0.4324015974998474, "learning_rate": 2.0448284084582626e-06, "loss": 0.3172, "step": 1143 }, { "epoch": 2.195777351247601, "grad_norm": 0.44841691851615906, "learning_rate": 2.0358238639588797e-06, "loss": 0.3239, "step": 1144 }, { "epoch": 2.197696737044146, "grad_norm": 0.451253741979599, "learning_rate": 2.0268341184785674e-06, "loss": 0.3182, "step": 1145 }, { "epoch": 2.199616122840691, "grad_norm": 0.43086883425712585, "learning_rate": 2.0178592168997536e-06, "loss": 0.3106, "step": 1146 }, { "epoch": 2.201535508637236, "grad_norm": 0.46180295944213867, "learning_rate": 2.0088992040307532e-06, "loss": 0.3172, "step": 1147 }, { "epoch": 2.203454894433781, "grad_norm": 0.44942930340766907, "learning_rate": 1.999954124605548e-06, "loss": 0.2961, "step": 1148 }, { "epoch": 2.2053742802303264, "grad_norm": 0.41918084025382996, "learning_rate": 1.991024023283562e-06, "loss": 0.3135, "step": 1149 }, { "epoch": 2.2072936660268714, "grad_norm": 0.4825381934642792, "learning_rate": 1.982108944649441e-06, "loss": 0.3096, "step": 1150 }, { "epoch": 2.2092130518234163, "grad_norm": 0.47269153594970703, "learning_rate": 1.9732089332128256e-06, "loss": 0.3038, "step": 1151 }, { "epoch": 2.2111324376199617, "grad_norm": 0.43091702461242676, "learning_rate": 1.9643240334081337e-06, "loss": 0.3146, "step": 1152 }, { "epoch": 2.2130518234165066, "grad_norm": 0.4603826105594635, "learning_rate": 1.955454289594336e-06, "loss": 0.32, "step": 1153 }, { "epoch": 2.214971209213052, "grad_norm": 0.43125930428504944, "learning_rate": 1.946599746054733e-06, "loss": 0.3181, "step": 1154 }, { "epoch": 2.216890595009597, "grad_norm": 0.4349064826965332, "learning_rate": 1.937760446996741e-06, "loss": 0.3141, "step": 1155 }, { "epoch": 2.218809980806142, "grad_norm": 0.4967914819717407, "learning_rate": 1.928936436551661e-06, "loss": 0.3193, "step": 1156 }, { "epoch": 2.2207293666026873, "grad_norm": 0.4555346667766571, "learning_rate": 1.920127758774466e-06, "loss": 0.3016, "step": 1157 }, { "epoch": 2.222648752399232, "grad_norm": 0.4372912645339966, "learning_rate": 1.9113344576435788e-06, "loss": 0.3081, "step": 1158 }, { "epoch": 2.224568138195777, "grad_norm": 0.442127525806427, "learning_rate": 1.902556577060652e-06, "loss": 0.2994, "step": 1159 }, { "epoch": 2.2264875239923225, "grad_norm": 0.45223817229270935, "learning_rate": 1.8937941608503484e-06, "loss": 0.2973, "step": 1160 }, { "epoch": 2.2284069097888675, "grad_norm": 0.44431525468826294, "learning_rate": 1.8850472527601249e-06, "loss": 0.3225, "step": 1161 }, { "epoch": 2.230326295585413, "grad_norm": 0.4687250554561615, "learning_rate": 1.8763158964600109e-06, "loss": 0.3117, "step": 1162 }, { "epoch": 2.232245681381958, "grad_norm": 0.44925227761268616, "learning_rate": 1.8676001355423896e-06, "loss": 0.2964, "step": 1163 }, { "epoch": 2.2341650671785027, "grad_norm": 0.4056849181652069, "learning_rate": 1.8589000135217882e-06, "loss": 0.3267, "step": 1164 }, { "epoch": 2.236084452975048, "grad_norm": 0.458795428276062, "learning_rate": 1.8502155738346488e-06, "loss": 0.2999, "step": 1165 }, { "epoch": 2.238003838771593, "grad_norm": 0.4548143446445465, "learning_rate": 1.8415468598391228e-06, "loss": 0.3496, "step": 1166 }, { "epoch": 2.2399232245681384, "grad_norm": 0.4114418923854828, "learning_rate": 1.8328939148148396e-06, "loss": 0.3151, "step": 1167 }, { "epoch": 2.2418426103646834, "grad_norm": 0.41555583477020264, "learning_rate": 1.8242567819627117e-06, "loss": 0.3134, "step": 1168 }, { "epoch": 2.2437619961612283, "grad_norm": 0.4459441602230072, "learning_rate": 1.8156355044047008e-06, "loss": 0.3092, "step": 1169 }, { "epoch": 2.2456813819577737, "grad_norm": 0.4490182399749756, "learning_rate": 1.8070301251836108e-06, "loss": 0.308, "step": 1170 }, { "epoch": 2.2476007677543186, "grad_norm": 0.4287179708480835, "learning_rate": 1.7984406872628702e-06, "loss": 0.3066, "step": 1171 }, { "epoch": 2.2495201535508635, "grad_norm": 0.42569804191589355, "learning_rate": 1.7898672335263173e-06, "loss": 0.3181, "step": 1172 }, { "epoch": 2.251439539347409, "grad_norm": 0.47702595591545105, "learning_rate": 1.7813098067779949e-06, "loss": 0.2981, "step": 1173 }, { "epoch": 2.253358925143954, "grad_norm": 0.41477781534194946, "learning_rate": 1.7727684497419185e-06, "loss": 0.3465, "step": 1174 }, { "epoch": 2.255278310940499, "grad_norm": 0.43526583909988403, "learning_rate": 1.764243205061879e-06, "loss": 0.2986, "step": 1175 }, { "epoch": 2.257197696737044, "grad_norm": 0.4421873986721039, "learning_rate": 1.755734115301223e-06, "loss": 0.3186, "step": 1176 }, { "epoch": 2.259117082533589, "grad_norm": 0.43704086542129517, "learning_rate": 1.7472412229426456e-06, "loss": 0.3317, "step": 1177 }, { "epoch": 2.2610364683301345, "grad_norm": 0.4429282248020172, "learning_rate": 1.7387645703879697e-06, "loss": 0.3258, "step": 1178 }, { "epoch": 2.2629558541266794, "grad_norm": 0.4295226037502289, "learning_rate": 1.7303041999579395e-06, "loss": 0.3301, "step": 1179 }, { "epoch": 2.2648752399232244, "grad_norm": 0.4342309236526489, "learning_rate": 1.721860153892011e-06, "loss": 0.2981, "step": 1180 }, { "epoch": 2.2667946257197698, "grad_norm": 0.40666764974594116, "learning_rate": 1.7134324743481367e-06, "loss": 0.3256, "step": 1181 }, { "epoch": 2.2687140115163147, "grad_norm": 0.4361766576766968, "learning_rate": 1.7050212034025576e-06, "loss": 0.3092, "step": 1182 }, { "epoch": 2.27063339731286, "grad_norm": 0.4519701600074768, "learning_rate": 1.6966263830495939e-06, "loss": 0.3076, "step": 1183 }, { "epoch": 2.272552783109405, "grad_norm": 0.4143938422203064, "learning_rate": 1.6882480552014324e-06, "loss": 0.3235, "step": 1184 }, { "epoch": 2.27447216890595, "grad_norm": 0.4343293309211731, "learning_rate": 1.6798862616879185e-06, "loss": 0.3252, "step": 1185 }, { "epoch": 2.2763915547024953, "grad_norm": 0.4478144347667694, "learning_rate": 1.6715410442563524e-06, "loss": 0.3214, "step": 1186 }, { "epoch": 2.2783109404990403, "grad_norm": 0.45583292841911316, "learning_rate": 1.6632124445712717e-06, "loss": 0.3162, "step": 1187 }, { "epoch": 2.2802303262955856, "grad_norm": 0.42757654190063477, "learning_rate": 1.654900504214249e-06, "loss": 0.3257, "step": 1188 }, { "epoch": 2.2821497120921306, "grad_norm": 0.4231518805027008, "learning_rate": 1.6466052646836834e-06, "loss": 0.3123, "step": 1189 }, { "epoch": 2.2840690978886755, "grad_norm": 0.42850592732429504, "learning_rate": 1.6383267673945925e-06, "loss": 0.3089, "step": 1190 }, { "epoch": 2.285988483685221, "grad_norm": 0.45356839895248413, "learning_rate": 1.630065053678407e-06, "loss": 0.299, "step": 1191 }, { "epoch": 2.287907869481766, "grad_norm": 0.45450273156166077, "learning_rate": 1.6218201647827626e-06, "loss": 0.307, "step": 1192 }, { "epoch": 2.2898272552783108, "grad_norm": 0.4107741415500641, "learning_rate": 1.6135921418712959e-06, "loss": 0.3103, "step": 1193 }, { "epoch": 2.291746641074856, "grad_norm": 0.4404025971889496, "learning_rate": 1.6053810260234354e-06, "loss": 0.3202, "step": 1194 }, { "epoch": 2.293666026871401, "grad_norm": 0.4031750559806824, "learning_rate": 1.5971868582342047e-06, "loss": 0.3331, "step": 1195 }, { "epoch": 2.295585412667946, "grad_norm": 0.43553730845451355, "learning_rate": 1.5890096794140075e-06, "loss": 0.3259, "step": 1196 }, { "epoch": 2.2975047984644914, "grad_norm": 0.43943485617637634, "learning_rate": 1.5808495303884297e-06, "loss": 0.3342, "step": 1197 }, { "epoch": 2.2994241842610363, "grad_norm": 0.4481421411037445, "learning_rate": 1.5727064518980307e-06, "loss": 0.3295, "step": 1198 }, { "epoch": 2.3013435700575817, "grad_norm": 0.42775166034698486, "learning_rate": 1.5645804845981443e-06, "loss": 0.3087, "step": 1199 }, { "epoch": 2.3032629558541267, "grad_norm": 0.4258095920085907, "learning_rate": 1.55647166905868e-06, "loss": 0.3074, "step": 1200 }, { "epoch": 2.3051823416506716, "grad_norm": 0.46972641348838806, "learning_rate": 1.5483800457639092e-06, "loss": 0.323, "step": 1201 }, { "epoch": 2.307101727447217, "grad_norm": 0.4255313575267792, "learning_rate": 1.5403056551122697e-06, "loss": 0.3304, "step": 1202 }, { "epoch": 2.309021113243762, "grad_norm": 0.43069988489151, "learning_rate": 1.5322485374161627e-06, "loss": 0.3204, "step": 1203 }, { "epoch": 2.3109404990403073, "grad_norm": 0.4178379774093628, "learning_rate": 1.5242087329017585e-06, "loss": 0.3151, "step": 1204 }, { "epoch": 2.3128598848368522, "grad_norm": 0.4551340639591217, "learning_rate": 1.516186281708778e-06, "loss": 0.3171, "step": 1205 }, { "epoch": 2.314779270633397, "grad_norm": 0.4343967139720917, "learning_rate": 1.5081812238903127e-06, "loss": 0.3291, "step": 1206 }, { "epoch": 2.3166986564299425, "grad_norm": 0.43540963530540466, "learning_rate": 1.5001935994126105e-06, "loss": 0.3112, "step": 1207 }, { "epoch": 2.3186180422264875, "grad_norm": 0.43084827065467834, "learning_rate": 1.492223448154882e-06, "loss": 0.3014, "step": 1208 }, { "epoch": 2.320537428023033, "grad_norm": 0.4427920877933502, "learning_rate": 1.4842708099091046e-06, "loss": 0.315, "step": 1209 }, { "epoch": 2.322456813819578, "grad_norm": 0.4350442588329315, "learning_rate": 1.4763357243798154e-06, "loss": 0.3104, "step": 1210 }, { "epoch": 2.3243761996161227, "grad_norm": 0.44006526470184326, "learning_rate": 1.468418231183918e-06, "loss": 0.3141, "step": 1211 }, { "epoch": 2.326295585412668, "grad_norm": 0.43505606055259705, "learning_rate": 1.4605183698504849e-06, "loss": 0.3019, "step": 1212 }, { "epoch": 2.328214971209213, "grad_norm": 0.4478895962238312, "learning_rate": 1.4526361798205597e-06, "loss": 0.3176, "step": 1213 }, { "epoch": 2.330134357005758, "grad_norm": 0.4124133586883545, "learning_rate": 1.4447717004469585e-06, "loss": 0.3322, "step": 1214 }, { "epoch": 2.3320537428023034, "grad_norm": 0.45907062292099, "learning_rate": 1.4369249709940759e-06, "loss": 0.3064, "step": 1215 }, { "epoch": 2.3339731285988483, "grad_norm": 0.4342540204524994, "learning_rate": 1.4290960306376856e-06, "loss": 0.3326, "step": 1216 }, { "epoch": 2.3358925143953932, "grad_norm": 0.4765087962150574, "learning_rate": 1.4212849184647521e-06, "loss": 0.2975, "step": 1217 }, { "epoch": 2.3378119001919386, "grad_norm": 0.4300127327442169, "learning_rate": 1.413491673473225e-06, "loss": 0.2992, "step": 1218 }, { "epoch": 2.3397312859884836, "grad_norm": 0.42785778641700745, "learning_rate": 1.4057163345718532e-06, "loss": 0.2952, "step": 1219 }, { "epoch": 2.341650671785029, "grad_norm": 0.42533954977989197, "learning_rate": 1.3979589405799865e-06, "loss": 0.3066, "step": 1220 }, { "epoch": 2.343570057581574, "grad_norm": 0.4315395951271057, "learning_rate": 1.390219530227378e-06, "loss": 0.3259, "step": 1221 }, { "epoch": 2.345489443378119, "grad_norm": 0.49068766832351685, "learning_rate": 1.382498142154003e-06, "loss": 0.3046, "step": 1222 }, { "epoch": 2.347408829174664, "grad_norm": 0.42176008224487305, "learning_rate": 1.374794814909854e-06, "loss": 0.3232, "step": 1223 }, { "epoch": 2.349328214971209, "grad_norm": 0.41193878650665283, "learning_rate": 1.3671095869547519e-06, "loss": 0.3185, "step": 1224 }, { "epoch": 2.3512476007677545, "grad_norm": 0.44443267583847046, "learning_rate": 1.3594424966581555e-06, "loss": 0.3249, "step": 1225 }, { "epoch": 2.3531669865642995, "grad_norm": 0.41290220618247986, "learning_rate": 1.3517935822989714e-06, "loss": 0.325, "step": 1226 }, { "epoch": 2.3550863723608444, "grad_norm": 0.44867274165153503, "learning_rate": 1.344162882065359e-06, "loss": 0.3011, "step": 1227 }, { "epoch": 2.3570057581573898, "grad_norm": 0.44962751865386963, "learning_rate": 1.3365504340545381e-06, "loss": 0.3217, "step": 1228 }, { "epoch": 2.3589251439539347, "grad_norm": 0.46337977051734924, "learning_rate": 1.328956276272606e-06, "loss": 0.3169, "step": 1229 }, { "epoch": 2.36084452975048, "grad_norm": 0.4197964072227478, "learning_rate": 1.321380446634342e-06, "loss": 0.3437, "step": 1230 }, { "epoch": 2.362763915547025, "grad_norm": 0.43727508187294006, "learning_rate": 1.3138229829630222e-06, "loss": 0.3241, "step": 1231 }, { "epoch": 2.36468330134357, "grad_norm": 0.41488125920295715, "learning_rate": 1.3062839229902264e-06, "loss": 0.3065, "step": 1232 }, { "epoch": 2.3666026871401153, "grad_norm": 0.44243428111076355, "learning_rate": 1.2987633043556507e-06, "loss": 0.3065, "step": 1233 }, { "epoch": 2.3685220729366603, "grad_norm": 0.42278915643692017, "learning_rate": 1.2912611646069224e-06, "loss": 0.3128, "step": 1234 }, { "epoch": 2.370441458733205, "grad_norm": 0.4485195577144623, "learning_rate": 1.2837775411994092e-06, "loss": 0.301, "step": 1235 }, { "epoch": 2.3723608445297506, "grad_norm": 0.5045731067657471, "learning_rate": 1.2763124714960352e-06, "loss": 0.3119, "step": 1236 }, { "epoch": 2.3742802303262955, "grad_norm": 0.47938111424446106, "learning_rate": 1.2688659927670916e-06, "loss": 0.325, "step": 1237 }, { "epoch": 2.3761996161228405, "grad_norm": 0.40125617384910583, "learning_rate": 1.2614381421900524e-06, "loss": 0.3136, "step": 1238 }, { "epoch": 2.378119001919386, "grad_norm": 0.44851693511009216, "learning_rate": 1.2540289568493862e-06, "loss": 0.321, "step": 1239 }, { "epoch": 2.380038387715931, "grad_norm": 0.4331222474575043, "learning_rate": 1.246638473736378e-06, "loss": 0.3251, "step": 1240 }, { "epoch": 2.381957773512476, "grad_norm": 0.4677213132381439, "learning_rate": 1.2392667297489358e-06, "loss": 0.3137, "step": 1241 }, { "epoch": 2.383877159309021, "grad_norm": 0.46194925904273987, "learning_rate": 1.2319137616914096e-06, "loss": 0.2992, "step": 1242 }, { "epoch": 2.385796545105566, "grad_norm": 0.4255446791648865, "learning_rate": 1.2245796062744103e-06, "loss": 0.3044, "step": 1243 }, { "epoch": 2.3877159309021114, "grad_norm": 0.44660255312919617, "learning_rate": 1.217264300114624e-06, "loss": 0.337, "step": 1244 }, { "epoch": 2.3896353166986564, "grad_norm": 0.4729732871055603, "learning_rate": 1.2099678797346282e-06, "loss": 0.3092, "step": 1245 }, { "epoch": 2.3915547024952017, "grad_norm": 0.41241148114204407, "learning_rate": 1.2026903815627122e-06, "loss": 0.3195, "step": 1246 }, { "epoch": 2.3934740882917467, "grad_norm": 0.4574911594390869, "learning_rate": 1.1954318419326938e-06, "loss": 0.3187, "step": 1247 }, { "epoch": 2.3953934740882916, "grad_norm": 0.45129984617233276, "learning_rate": 1.1881922970837352e-06, "loss": 0.2844, "step": 1248 }, { "epoch": 2.397312859884837, "grad_norm": 0.4964337944984436, "learning_rate": 1.1809717831601697e-06, "loss": 0.3268, "step": 1249 }, { "epoch": 2.399232245681382, "grad_norm": 0.4493864178657532, "learning_rate": 1.1737703362113134e-06, "loss": 0.2943, "step": 1250 }, { "epoch": 2.401151631477927, "grad_norm": 0.4391627609729767, "learning_rate": 1.1665879921912887e-06, "loss": 0.3166, "step": 1251 }, { "epoch": 2.4030710172744723, "grad_norm": 0.4752274751663208, "learning_rate": 1.1594247869588398e-06, "loss": 0.3097, "step": 1252 }, { "epoch": 2.404990403071017, "grad_norm": 0.4595050513744354, "learning_rate": 1.1522807562771676e-06, "loss": 0.3103, "step": 1253 }, { "epoch": 2.4069097888675626, "grad_norm": 0.4383956789970398, "learning_rate": 1.1451559358137337e-06, "loss": 0.3158, "step": 1254 }, { "epoch": 2.4088291746641075, "grad_norm": 0.4349156618118286, "learning_rate": 1.1380503611400933e-06, "loss": 0.3148, "step": 1255 }, { "epoch": 2.4107485604606524, "grad_norm": 0.42858800292015076, "learning_rate": 1.1309640677317145e-06, "loss": 0.3238, "step": 1256 }, { "epoch": 2.412667946257198, "grad_norm": 0.45103326439857483, "learning_rate": 1.1238970909677993e-06, "loss": 0.332, "step": 1257 }, { "epoch": 2.4145873320537428, "grad_norm": 0.4650861620903015, "learning_rate": 1.1168494661311153e-06, "loss": 0.2995, "step": 1258 }, { "epoch": 2.4165067178502877, "grad_norm": 0.41937968134880066, "learning_rate": 1.1098212284078037e-06, "loss": 0.328, "step": 1259 }, { "epoch": 2.418426103646833, "grad_norm": 0.43066081404685974, "learning_rate": 1.1028124128872191e-06, "loss": 0.3229, "step": 1260 }, { "epoch": 2.420345489443378, "grad_norm": 0.40924593806266785, "learning_rate": 1.095823054561747e-06, "loss": 0.3192, "step": 1261 }, { "epoch": 2.4222648752399234, "grad_norm": 0.4372531771659851, "learning_rate": 1.0888531883266323e-06, "loss": 0.3073, "step": 1262 }, { "epoch": 2.4241842610364683, "grad_norm": 0.4405689537525177, "learning_rate": 1.0819028489798006e-06, "loss": 0.3274, "step": 1263 }, { "epoch": 2.4261036468330133, "grad_norm": 0.4325174391269684, "learning_rate": 1.0749720712216877e-06, "loss": 0.3156, "step": 1264 }, { "epoch": 2.4280230326295587, "grad_norm": 0.42035821080207825, "learning_rate": 1.068060889655066e-06, "loss": 0.2945, "step": 1265 }, { "epoch": 2.4299424184261036, "grad_norm": 0.4496804177761078, "learning_rate": 1.061169338784872e-06, "loss": 0.307, "step": 1266 }, { "epoch": 2.431861804222649, "grad_norm": 0.45425546169281006, "learning_rate": 1.0542974530180327e-06, "loss": 0.3078, "step": 1267 }, { "epoch": 2.433781190019194, "grad_norm": 0.4608159065246582, "learning_rate": 1.0474452666632946e-06, "loss": 0.3015, "step": 1268 }, { "epoch": 2.435700575815739, "grad_norm": 0.4457089900970459, "learning_rate": 1.0406128139310534e-06, "loss": 0.3363, "step": 1269 }, { "epoch": 2.4376199616122842, "grad_norm": 0.43132296204566956, "learning_rate": 1.033800128933179e-06, "loss": 0.305, "step": 1270 }, { "epoch": 2.439539347408829, "grad_norm": 0.41804131865501404, "learning_rate": 1.027007245682855e-06, "loss": 0.2726, "step": 1271 }, { "epoch": 2.441458733205374, "grad_norm": 0.3982507586479187, "learning_rate": 1.0202341980943965e-06, "loss": 0.3159, "step": 1272 }, { "epoch": 2.4433781190019195, "grad_norm": 0.4465849697589874, "learning_rate": 1.013481019983088e-06, "loss": 0.3003, "step": 1273 }, { "epoch": 2.4452975047984644, "grad_norm": 0.4376426935195923, "learning_rate": 1.0067477450650137e-06, "loss": 0.3224, "step": 1274 }, { "epoch": 2.4472168905950094, "grad_norm": 0.44291427731513977, "learning_rate": 1.0000344069568885e-06, "loss": 0.2994, "step": 1275 }, { "epoch": 2.4491362763915547, "grad_norm": 0.44236990809440613, "learning_rate": 9.933410391758908e-07, "loss": 0.3196, "step": 1276 }, { "epoch": 2.4510556621880997, "grad_norm": 0.3991670310497284, "learning_rate": 9.866676751394927e-07, "loss": 0.3122, "step": 1277 }, { "epoch": 2.452975047984645, "grad_norm": 0.45307478308677673, "learning_rate": 9.80014348165298e-07, "loss": 0.3318, "step": 1278 }, { "epoch": 2.45489443378119, "grad_norm": 0.4078476130962372, "learning_rate": 9.733810914708692e-07, "loss": 0.3211, "step": 1279 }, { "epoch": 2.456813819577735, "grad_norm": 0.4293960928916931, "learning_rate": 9.667679381735706e-07, "loss": 0.3089, "step": 1280 }, { "epoch": 2.4587332053742803, "grad_norm": 0.44673964381217957, "learning_rate": 9.601749212903937e-07, "loss": 0.3109, "step": 1281 }, { "epoch": 2.4606525911708252, "grad_norm": 0.4356265366077423, "learning_rate": 9.536020737377993e-07, "loss": 0.3253, "step": 1282 }, { "epoch": 2.4625719769673706, "grad_norm": 0.4266453683376312, "learning_rate": 9.470494283315451e-07, "loss": 0.3027, "step": 1283 }, { "epoch": 2.4644913627639156, "grad_norm": 0.4322926700115204, "learning_rate": 9.405170177865308e-07, "loss": 0.3, "step": 1284 }, { "epoch": 2.4664107485604605, "grad_norm": 0.418288916349411, "learning_rate": 9.340048747166341e-07, "loss": 0.2821, "step": 1285 }, { "epoch": 2.468330134357006, "grad_norm": 0.44970154762268066, "learning_rate": 9.275130316345393e-07, "loss": 0.3242, "step": 1286 }, { "epoch": 2.470249520153551, "grad_norm": 0.40629079937934875, "learning_rate": 9.210415209515833e-07, "loss": 0.3124, "step": 1287 }, { "epoch": 2.472168905950096, "grad_norm": 0.4125354290008545, "learning_rate": 9.145903749775886e-07, "loss": 0.3006, "step": 1288 }, { "epoch": 2.474088291746641, "grad_norm": 0.46777647733688354, "learning_rate": 9.08159625920711e-07, "loss": 0.3064, "step": 1289 }, { "epoch": 2.476007677543186, "grad_norm": 0.4235178530216217, "learning_rate": 9.017493058872623e-07, "loss": 0.3221, "step": 1290 }, { "epoch": 2.4779270633397315, "grad_norm": 0.435539186000824, "learning_rate": 8.953594468815663e-07, "loss": 0.314, "step": 1291 }, { "epoch": 2.4798464491362764, "grad_norm": 0.43735814094543457, "learning_rate": 8.889900808057911e-07, "loss": 0.3254, "step": 1292 }, { "epoch": 2.4817658349328213, "grad_norm": 0.43097421526908875, "learning_rate": 8.826412394597906e-07, "loss": 0.2961, "step": 1293 }, { "epoch": 2.4836852207293667, "grad_norm": 0.4145753085613251, "learning_rate": 8.763129545409488e-07, "loss": 0.3413, "step": 1294 }, { "epoch": 2.4856046065259116, "grad_norm": 0.42994990944862366, "learning_rate": 8.700052576440166e-07, "loss": 0.3121, "step": 1295 }, { "epoch": 2.4875239923224566, "grad_norm": 0.4394974112510681, "learning_rate": 8.637181802609579e-07, "loss": 0.3125, "step": 1296 }, { "epoch": 2.489443378119002, "grad_norm": 0.42037874460220337, "learning_rate": 8.574517537807897e-07, "loss": 0.3448, "step": 1297 }, { "epoch": 2.491362763915547, "grad_norm": 0.4084146022796631, "learning_rate": 8.512060094894286e-07, "loss": 0.318, "step": 1298 }, { "epoch": 2.4932821497120923, "grad_norm": 0.4284982681274414, "learning_rate": 8.449809785695318e-07, "loss": 0.308, "step": 1299 }, { "epoch": 2.495201535508637, "grad_norm": 0.4659748375415802, "learning_rate": 8.387766921003427e-07, "loss": 0.2885, "step": 1300 }, { "epoch": 2.497120921305182, "grad_norm": 0.4472147524356842, "learning_rate": 8.325931810575344e-07, "loss": 0.3107, "step": 1301 }, { "epoch": 2.4990403071017275, "grad_norm": 0.4051799774169922, "learning_rate": 8.264304763130576e-07, "loss": 0.3234, "step": 1302 }, { "epoch": 2.5009596928982725, "grad_norm": 0.42375609278678894, "learning_rate": 8.202886086349848e-07, "loss": 0.3055, "step": 1303 }, { "epoch": 2.502879078694818, "grad_norm": 0.41215038299560547, "learning_rate": 8.141676086873574e-07, "loss": 0.3212, "step": 1304 }, { "epoch": 2.504798464491363, "grad_norm": 0.4317355453968048, "learning_rate": 8.080675070300303e-07, "loss": 0.3097, "step": 1305 }, { "epoch": 2.5067178502879077, "grad_norm": 0.42706504464149475, "learning_rate": 8.019883341185192e-07, "loss": 0.3095, "step": 1306 }, { "epoch": 2.508637236084453, "grad_norm": 0.46018901467323303, "learning_rate": 7.959301203038566e-07, "loss": 0.2885, "step": 1307 }, { "epoch": 2.510556621880998, "grad_norm": 0.44744229316711426, "learning_rate": 7.898928958324298e-07, "loss": 0.3333, "step": 1308 }, { "epoch": 2.5124760076775434, "grad_norm": 0.408231645822525, "learning_rate": 7.838766908458339e-07, "loss": 0.3326, "step": 1309 }, { "epoch": 2.5143953934740884, "grad_norm": 0.4125899374485016, "learning_rate": 7.77881535380724e-07, "loss": 0.3177, "step": 1310 }, { "epoch": 2.5163147792706333, "grad_norm": 0.47725623846054077, "learning_rate": 7.719074593686593e-07, "loss": 0.2973, "step": 1311 }, { "epoch": 2.5182341650671782, "grad_norm": 0.4335457384586334, "learning_rate": 7.659544926359636e-07, "loss": 0.3345, "step": 1312 }, { "epoch": 2.5201535508637236, "grad_norm": 0.4315381646156311, "learning_rate": 7.600226649035619e-07, "loss": 0.3182, "step": 1313 }, { "epoch": 2.5220729366602685, "grad_norm": 0.44607633352279663, "learning_rate": 7.541120057868456e-07, "loss": 0.3014, "step": 1314 }, { "epoch": 2.523992322456814, "grad_norm": 0.3996421992778778, "learning_rate": 7.482225447955155e-07, "loss": 0.3279, "step": 1315 }, { "epoch": 2.525911708253359, "grad_norm": 0.43521785736083984, "learning_rate": 7.423543113334436e-07, "loss": 0.3192, "step": 1316 }, { "epoch": 2.527831094049904, "grad_norm": 0.4063127934932709, "learning_rate": 7.365073346985158e-07, "loss": 0.3036, "step": 1317 }, { "epoch": 2.529750479846449, "grad_norm": 0.4267193377017975, "learning_rate": 7.306816440824915e-07, "loss": 0.317, "step": 1318 }, { "epoch": 2.531669865642994, "grad_norm": 0.4290691912174225, "learning_rate": 7.248772685708589e-07, "loss": 0.3029, "step": 1319 }, { "epoch": 2.5335892514395395, "grad_norm": 0.4309139549732208, "learning_rate": 7.190942371426862e-07, "loss": 0.2868, "step": 1320 }, { "epoch": 2.5355086372360844, "grad_norm": 0.41153669357299805, "learning_rate": 7.133325786704792e-07, "loss": 0.308, "step": 1321 }, { "epoch": 2.5374280230326294, "grad_norm": 0.4044669568538666, "learning_rate": 7.075923219200359e-07, "loss": 0.3204, "step": 1322 }, { "epoch": 2.5393474088291748, "grad_norm": 0.41248294711112976, "learning_rate": 7.018734955503048e-07, "loss": 0.3262, "step": 1323 }, { "epoch": 2.5412667946257197, "grad_norm": 0.4335043728351593, "learning_rate": 6.961761281132385e-07, "loss": 0.3177, "step": 1324 }, { "epoch": 2.543186180422265, "grad_norm": 0.4295669198036194, "learning_rate": 6.905002480536565e-07, "loss": 0.324, "step": 1325 }, { "epoch": 2.54510556621881, "grad_norm": 0.43261173367500305, "learning_rate": 6.848458837090971e-07, "loss": 0.2961, "step": 1326 }, { "epoch": 2.547024952015355, "grad_norm": 0.4404785931110382, "learning_rate": 6.7921306330968e-07, "loss": 0.3066, "step": 1327 }, { "epoch": 2.5489443378119003, "grad_norm": 0.40826162695884705, "learning_rate": 6.736018149779628e-07, "loss": 0.3064, "step": 1328 }, { "epoch": 2.5508637236084453, "grad_norm": 0.40300145745277405, "learning_rate": 6.680121667288026e-07, "loss": 0.3067, "step": 1329 }, { "epoch": 2.5527831094049906, "grad_norm": 0.4415788948535919, "learning_rate": 6.624441464692161e-07, "loss": 0.3228, "step": 1330 }, { "epoch": 2.5547024952015356, "grad_norm": 0.4196246266365051, "learning_rate": 6.568977819982386e-07, "loss": 0.2786, "step": 1331 }, { "epoch": 2.5566218809980805, "grad_norm": 0.41160982847213745, "learning_rate": 6.513731010067869e-07, "loss": 0.3307, "step": 1332 }, { "epoch": 2.5585412667946255, "grad_norm": 0.4087945818901062, "learning_rate": 6.458701310775184e-07, "loss": 0.3103, "step": 1333 }, { "epoch": 2.560460652591171, "grad_norm": 0.39813998341560364, "learning_rate": 6.403888996846991e-07, "loss": 0.3258, "step": 1334 }, { "epoch": 2.5623800383877158, "grad_norm": 0.42304396629333496, "learning_rate": 6.349294341940593e-07, "loss": 0.3162, "step": 1335 }, { "epoch": 2.564299424184261, "grad_norm": 0.41241905093193054, "learning_rate": 6.294917618626622e-07, "loss": 0.3018, "step": 1336 }, { "epoch": 2.566218809980806, "grad_norm": 0.45237982273101807, "learning_rate": 6.240759098387628e-07, "loss": 0.3075, "step": 1337 }, { "epoch": 2.568138195777351, "grad_norm": 0.4210013151168823, "learning_rate": 6.1868190516168e-07, "loss": 0.3177, "step": 1338 }, { "epoch": 2.5700575815738964, "grad_norm": 0.41607722640037537, "learning_rate": 6.133097747616546e-07, "loss": 0.3242, "step": 1339 }, { "epoch": 2.5719769673704413, "grad_norm": 0.4222376346588135, "learning_rate": 6.07959545459717e-07, "loss": 0.311, "step": 1340 }, { "epoch": 2.5738963531669867, "grad_norm": 0.42890986800193787, "learning_rate": 6.026312439675553e-07, "loss": 0.3143, "step": 1341 }, { "epoch": 2.5758157389635317, "grad_norm": 0.4139798581600189, "learning_rate": 5.973248968873774e-07, "loss": 0.3167, "step": 1342 }, { "epoch": 2.5777351247600766, "grad_norm": 0.40550974011421204, "learning_rate": 5.92040530711786e-07, "loss": 0.3306, "step": 1343 }, { "epoch": 2.579654510556622, "grad_norm": 0.4133010804653168, "learning_rate": 5.867781718236359e-07, "loss": 0.314, "step": 1344 }, { "epoch": 2.581573896353167, "grad_norm": 0.478579044342041, "learning_rate": 5.815378464959109e-07, "loss": 0.31, "step": 1345 }, { "epoch": 2.5834932821497123, "grad_norm": 0.40308183431625366, "learning_rate": 5.763195808915873e-07, "loss": 0.3118, "step": 1346 }, { "epoch": 2.5854126679462572, "grad_norm": 0.42606687545776367, "learning_rate": 5.711234010635103e-07, "loss": 0.2861, "step": 1347 }, { "epoch": 2.587332053742802, "grad_norm": 0.42255333065986633, "learning_rate": 5.659493329542531e-07, "loss": 0.308, "step": 1348 }, { "epoch": 2.5892514395393476, "grad_norm": 0.4213169515132904, "learning_rate": 5.607974023959977e-07, "loss": 0.3058, "step": 1349 }, { "epoch": 2.5911708253358925, "grad_norm": 0.42563602328300476, "learning_rate": 5.55667635110399e-07, "loss": 0.3067, "step": 1350 }, { "epoch": 2.593090211132438, "grad_norm": 0.4288907051086426, "learning_rate": 5.505600567084602e-07, "loss": 0.3492, "step": 1351 }, { "epoch": 2.595009596928983, "grad_norm": 0.404648095369339, "learning_rate": 5.454746926904031e-07, "loss": 0.3169, "step": 1352 }, { "epoch": 2.5969289827255277, "grad_norm": 0.4173365831375122, "learning_rate": 5.40411568445543e-07, "loss": 0.3247, "step": 1353 }, { "epoch": 2.5988483685220727, "grad_norm": 0.4107537865638733, "learning_rate": 5.353707092521581e-07, "loss": 0.328, "step": 1354 }, { "epoch": 2.600767754318618, "grad_norm": 0.44188162684440613, "learning_rate": 5.303521402773665e-07, "loss": 0.3021, "step": 1355 }, { "epoch": 2.602687140115163, "grad_norm": 0.4295837879180908, "learning_rate": 5.253558865770009e-07, "loss": 0.2932, "step": 1356 }, { "epoch": 2.6046065259117084, "grad_norm": 0.41246965527534485, "learning_rate": 5.203819730954807e-07, "loss": 0.3242, "step": 1357 }, { "epoch": 2.6065259117082533, "grad_norm": 0.41591379046440125, "learning_rate": 5.154304246656888e-07, "loss": 0.3076, "step": 1358 }, { "epoch": 2.6084452975047983, "grad_norm": 0.4368695616722107, "learning_rate": 5.105012660088493e-07, "loss": 0.2775, "step": 1359 }, { "epoch": 2.6103646833013436, "grad_norm": 0.4013568162918091, "learning_rate": 5.055945217344004e-07, "loss": 0.3098, "step": 1360 }, { "epoch": 2.6122840690978886, "grad_norm": 0.42002663016319275, "learning_rate": 5.007102163398758e-07, "loss": 0.3069, "step": 1361 }, { "epoch": 2.614203454894434, "grad_norm": 0.42783382534980774, "learning_rate": 4.958483742107783e-07, "loss": 0.2855, "step": 1362 }, { "epoch": 2.616122840690979, "grad_norm": 0.41534850001335144, "learning_rate": 4.910090196204626e-07, "loss": 0.3286, "step": 1363 }, { "epoch": 2.618042226487524, "grad_norm": 0.42190253734588623, "learning_rate": 4.861921767300081e-07, "loss": 0.3147, "step": 1364 }, { "epoch": 2.619961612284069, "grad_norm": 0.44334304332733154, "learning_rate": 4.81397869588106e-07, "loss": 0.3286, "step": 1365 }, { "epoch": 2.621880998080614, "grad_norm": 0.4278927147388458, "learning_rate": 4.766261221309321e-07, "loss": 0.3076, "step": 1366 }, { "epoch": 2.6238003838771595, "grad_norm": 0.3953321576118469, "learning_rate": 4.718769581820309e-07, "loss": 0.3328, "step": 1367 }, { "epoch": 2.6257197696737045, "grad_norm": 0.4349899888038635, "learning_rate": 4.671504014521938e-07, "loss": 0.319, "step": 1368 }, { "epoch": 2.6276391554702494, "grad_norm": 0.43109330534935, "learning_rate": 4.6244647553934594e-07, "loss": 0.3111, "step": 1369 }, { "epoch": 2.629558541266795, "grad_norm": 0.4177679717540741, "learning_rate": 4.5776520392842473e-07, "loss": 0.3187, "step": 1370 }, { "epoch": 2.6314779270633397, "grad_norm": 0.43818461894989014, "learning_rate": 4.531066099912623e-07, "loss": 0.3059, "step": 1371 }, { "epoch": 2.633397312859885, "grad_norm": 0.43758687376976013, "learning_rate": 4.484707169864699e-07, "loss": 0.2914, "step": 1372 }, { "epoch": 2.63531669865643, "grad_norm": 0.4283095896244049, "learning_rate": 4.43857548059321e-07, "loss": 0.2987, "step": 1373 }, { "epoch": 2.637236084452975, "grad_norm": 0.41705238819122314, "learning_rate": 4.392671262416387e-07, "loss": 0.3008, "step": 1374 }, { "epoch": 2.63915547024952, "grad_norm": 0.4377995431423187, "learning_rate": 4.346994744516747e-07, "loss": 0.3355, "step": 1375 }, { "epoch": 2.6410748560460653, "grad_norm": 0.4349444508552551, "learning_rate": 4.301546154940006e-07, "loss": 0.3322, "step": 1376 }, { "epoch": 2.6429942418426102, "grad_norm": 0.4062652289867401, "learning_rate": 4.2563257205939124e-07, "loss": 0.2917, "step": 1377 }, { "epoch": 2.6449136276391556, "grad_norm": 0.41739413142204285, "learning_rate": 4.211333667247125e-07, "loss": 0.3169, "step": 1378 }, { "epoch": 2.6468330134357005, "grad_norm": 0.42524296045303345, "learning_rate": 4.1665702195280986e-07, "loss": 0.3234, "step": 1379 }, { "epoch": 2.6487523992322455, "grad_norm": 0.41360804438591003, "learning_rate": 4.122035600923913e-07, "loss": 0.3068, "step": 1380 }, { "epoch": 2.650671785028791, "grad_norm": 0.4248666763305664, "learning_rate": 4.077730033779215e-07, "loss": 0.3296, "step": 1381 }, { "epoch": 2.652591170825336, "grad_norm": 0.4026371240615845, "learning_rate": 4.0336537392950626e-07, "loss": 0.327, "step": 1382 }, { "epoch": 2.654510556621881, "grad_norm": 0.42271992564201355, "learning_rate": 3.989806937527868e-07, "loss": 0.3021, "step": 1383 }, { "epoch": 2.656429942418426, "grad_norm": 0.42590317130088806, "learning_rate": 3.9461898473882485e-07, "loss": 0.3164, "step": 1384 }, { "epoch": 2.658349328214971, "grad_norm": 0.4365948736667633, "learning_rate": 3.902802686639967e-07, "loss": 0.285, "step": 1385 }, { "epoch": 2.6602687140115164, "grad_norm": 0.42524266242980957, "learning_rate": 3.859645671898843e-07, "loss": 0.3146, "step": 1386 }, { "epoch": 2.6621880998080614, "grad_norm": 0.4387440085411072, "learning_rate": 3.816719018631637e-07, "loss": 0.3207, "step": 1387 }, { "epoch": 2.6641074856046068, "grad_norm": 0.4367164671421051, "learning_rate": 3.774022941155042e-07, "loss": 0.3226, "step": 1388 }, { "epoch": 2.6660268714011517, "grad_norm": 0.4064806401729584, "learning_rate": 3.7315576526345433e-07, "loss": 0.3039, "step": 1389 }, { "epoch": 2.6679462571976966, "grad_norm": 0.4218124747276306, "learning_rate": 3.6893233650833916e-07, "loss": 0.2979, "step": 1390 }, { "epoch": 2.669865642994242, "grad_norm": 0.40375977754592896, "learning_rate": 3.647320289361517e-07, "loss": 0.3235, "step": 1391 }, { "epoch": 2.671785028790787, "grad_norm": 0.4095809757709503, "learning_rate": 3.6055486351745327e-07, "loss": 0.3286, "step": 1392 }, { "epoch": 2.6737044145873323, "grad_norm": 0.4179653227329254, "learning_rate": 3.5640086110726337e-07, "loss": 0.3232, "step": 1393 }, { "epoch": 2.6756238003838773, "grad_norm": 0.43076270818710327, "learning_rate": 3.5227004244495653e-07, "loss": 0.3156, "step": 1394 }, { "epoch": 2.677543186180422, "grad_norm": 0.44649484753608704, "learning_rate": 3.4816242815416014e-07, "loss": 0.2866, "step": 1395 }, { "epoch": 2.679462571976967, "grad_norm": 0.40290388464927673, "learning_rate": 3.4407803874264955e-07, "loss": 0.2984, "step": 1396 }, { "epoch": 2.6813819577735125, "grad_norm": 0.42930224537849426, "learning_rate": 3.4001689460225197e-07, "loss": 0.3088, "step": 1397 }, { "epoch": 2.6833013435700575, "grad_norm": 0.4360639154911041, "learning_rate": 3.3597901600873286e-07, "loss": 0.33, "step": 1398 }, { "epoch": 2.685220729366603, "grad_norm": 0.44387003779411316, "learning_rate": 3.3196442312170563e-07, "loss": 0.3298, "step": 1399 }, { "epoch": 2.6871401151631478, "grad_norm": 0.4076465368270874, "learning_rate": 3.2797313598452506e-07, "loss": 0.332, "step": 1400 }, { "epoch": 2.6890595009596927, "grad_norm": 0.4197803735733032, "learning_rate": 3.2400517452419176e-07, "loss": 0.3057, "step": 1401 }, { "epoch": 2.690978886756238, "grad_norm": 0.4352559447288513, "learning_rate": 3.2006055855124786e-07, "loss": 0.3137, "step": 1402 }, { "epoch": 2.692898272552783, "grad_norm": 0.4012155830860138, "learning_rate": 3.161393077596797e-07, "loss": 0.3095, "step": 1403 }, { "epoch": 2.6948176583493284, "grad_norm": 0.40506085753440857, "learning_rate": 3.122414417268216e-07, "loss": 0.3066, "step": 1404 }, { "epoch": 2.6967370441458733, "grad_norm": 0.45125508308410645, "learning_rate": 3.0836697991325547e-07, "loss": 0.292, "step": 1405 }, { "epoch": 2.6986564299424183, "grad_norm": 0.4334825873374939, "learning_rate": 3.045159416627158e-07, "loss": 0.3002, "step": 1406 }, { "epoch": 2.7005758157389637, "grad_norm": 0.4204193949699402, "learning_rate": 3.0068834620199106e-07, "loss": 0.3113, "step": 1407 }, { "epoch": 2.7024952015355086, "grad_norm": 0.4257374107837677, "learning_rate": 2.968842126408289e-07, "loss": 0.312, "step": 1408 }, { "epoch": 2.704414587332054, "grad_norm": 0.422198086977005, "learning_rate": 2.931035599718396e-07, "loss": 0.3024, "step": 1409 }, { "epoch": 2.706333973128599, "grad_norm": 0.4264659285545349, "learning_rate": 2.893464070704055e-07, "loss": 0.3016, "step": 1410 }, { "epoch": 2.708253358925144, "grad_norm": 0.4058426320552826, "learning_rate": 2.85612772694579e-07, "loss": 0.3122, "step": 1411 }, { "epoch": 2.710172744721689, "grad_norm": 0.39454710483551025, "learning_rate": 2.8190267548499684e-07, "loss": 0.3243, "step": 1412 }, { "epoch": 2.712092130518234, "grad_norm": 0.39569634199142456, "learning_rate": 2.7821613396478097e-07, "loss": 0.332, "step": 1413 }, { "epoch": 2.714011516314779, "grad_norm": 0.4027731418609619, "learning_rate": 2.7455316653945075e-07, "loss": 0.2961, "step": 1414 }, { "epoch": 2.7159309021113245, "grad_norm": 0.4653429388999939, "learning_rate": 2.7091379149682683e-07, "loss": 0.3048, "step": 1415 }, { "epoch": 2.7178502879078694, "grad_norm": 0.4057263135910034, "learning_rate": 2.672980270069436e-07, "loss": 0.3123, "step": 1416 }, { "epoch": 2.7197696737044144, "grad_norm": 0.3960817754268646, "learning_rate": 2.63705891121957e-07, "loss": 0.3038, "step": 1417 }, { "epoch": 2.7216890595009597, "grad_norm": 0.4117777645587921, "learning_rate": 2.6013740177605105e-07, "loss": 0.3156, "step": 1418 }, { "epoch": 2.7236084452975047, "grad_norm": 0.45482003688812256, "learning_rate": 2.5659257678535664e-07, "loss": 0.2909, "step": 1419 }, { "epoch": 2.72552783109405, "grad_norm": 0.43561840057373047, "learning_rate": 2.53071433847854e-07, "loss": 0.3261, "step": 1420 }, { "epoch": 2.727447216890595, "grad_norm": 0.41022825241088867, "learning_rate": 2.4957399054328815e-07, "loss": 0.3094, "step": 1421 }, { "epoch": 2.72936660268714, "grad_norm": 0.40966078639030457, "learning_rate": 2.461002643330795e-07, "loss": 0.3172, "step": 1422 }, { "epoch": 2.7312859884836853, "grad_norm": 0.425167441368103, "learning_rate": 2.42650272560242e-07, "loss": 0.3121, "step": 1423 }, { "epoch": 2.7332053742802302, "grad_norm": 0.43444162607192993, "learning_rate": 2.3922403244928836e-07, "loss": 0.3111, "step": 1424 }, { "epoch": 2.7351247600767756, "grad_norm": 0.39095646142959595, "learning_rate": 2.3582156110614985e-07, "loss": 0.2984, "step": 1425 }, { "epoch": 2.7370441458733206, "grad_norm": 0.4049433171749115, "learning_rate": 2.3244287551808964e-07, "loss": 0.2976, "step": 1426 }, { "epoch": 2.7389635316698655, "grad_norm": 0.4083241820335388, "learning_rate": 2.2908799255361546e-07, "loss": 0.3155, "step": 1427 }, { "epoch": 2.740882917466411, "grad_norm": 0.40490564703941345, "learning_rate": 2.2575692896240175e-07, "loss": 0.3227, "step": 1428 }, { "epoch": 2.742802303262956, "grad_norm": 0.41849255561828613, "learning_rate": 2.2244970137519585e-07, "loss": 0.3152, "step": 1429 }, { "epoch": 2.744721689059501, "grad_norm": 0.41007882356643677, "learning_rate": 2.1916632630374579e-07, "loss": 0.3239, "step": 1430 }, { "epoch": 2.746641074856046, "grad_norm": 0.3919316232204437, "learning_rate": 2.1590682014070997e-07, "loss": 0.3023, "step": 1431 }, { "epoch": 2.748560460652591, "grad_norm": 0.41691434383392334, "learning_rate": 2.126711991595809e-07, "loss": 0.3183, "step": 1432 }, { "epoch": 2.750479846449136, "grad_norm": 0.43507540225982666, "learning_rate": 2.0945947951459876e-07, "loss": 0.3299, "step": 1433 }, { "epoch": 2.7523992322456814, "grad_norm": 0.4089444875717163, "learning_rate": 2.062716772406753e-07, "loss": 0.3276, "step": 1434 }, { "epoch": 2.7543186180422263, "grad_norm": 0.41633155941963196, "learning_rate": 2.0310780825331056e-07, "loss": 0.3113, "step": 1435 }, { "epoch": 2.7562380038387717, "grad_norm": 0.44201090931892395, "learning_rate": 1.999678883485151e-07, "loss": 0.3119, "step": 1436 }, { "epoch": 2.7581573896353166, "grad_norm": 0.41536107659339905, "learning_rate": 1.968519332027302e-07, "loss": 0.3352, "step": 1437 }, { "epoch": 2.7600767754318616, "grad_norm": 0.40837523341178894, "learning_rate": 1.9375995837275174e-07, "loss": 0.3229, "step": 1438 }, { "epoch": 2.761996161228407, "grad_norm": 0.40294498205184937, "learning_rate": 1.9069197929564854e-07, "loss": 0.3351, "step": 1439 }, { "epoch": 2.763915547024952, "grad_norm": 0.4227578043937683, "learning_rate": 1.876480112886886e-07, "loss": 0.3161, "step": 1440 }, { "epoch": 2.7658349328214973, "grad_norm": 0.4386253356933594, "learning_rate": 1.8462806954926306e-07, "loss": 0.3372, "step": 1441 }, { "epoch": 2.767754318618042, "grad_norm": 0.4135630130767822, "learning_rate": 1.8163216915480787e-07, "loss": 0.3169, "step": 1442 }, { "epoch": 2.769673704414587, "grad_norm": 0.4121257960796356, "learning_rate": 1.786603250627278e-07, "loss": 0.3013, "step": 1443 }, { "epoch": 2.7715930902111325, "grad_norm": 0.45425140857696533, "learning_rate": 1.7571255211032702e-07, "loss": 0.3123, "step": 1444 }, { "epoch": 2.7735124760076775, "grad_norm": 0.4257670044898987, "learning_rate": 1.7278886501472804e-07, "loss": 0.2889, "step": 1445 }, { "epoch": 2.775431861804223, "grad_norm": 0.44320055842399597, "learning_rate": 1.69889278372804e-07, "loss": 0.3096, "step": 1446 }, { "epoch": 2.777351247600768, "grad_norm": 0.4233901798725128, "learning_rate": 1.6701380666110323e-07, "loss": 0.3175, "step": 1447 }, { "epoch": 2.7792706333973127, "grad_norm": 0.4123902916908264, "learning_rate": 1.641624642357764e-07, "loss": 0.3097, "step": 1448 }, { "epoch": 2.781190019193858, "grad_norm": 0.42838895320892334, "learning_rate": 1.6133526533250566e-07, "loss": 0.3064, "step": 1449 }, { "epoch": 2.783109404990403, "grad_norm": 0.42039698362350464, "learning_rate": 1.5853222406643555e-07, "loss": 0.3189, "step": 1450 }, { "epoch": 2.7850287907869484, "grad_norm": 0.39586174488067627, "learning_rate": 1.5575335443209882e-07, "loss": 0.3231, "step": 1451 }, { "epoch": 2.7869481765834934, "grad_norm": 0.4079888164997101, "learning_rate": 1.5299867030334815e-07, "loss": 0.3237, "step": 1452 }, { "epoch": 2.7888675623800383, "grad_norm": 0.405671626329422, "learning_rate": 1.5026818543328826e-07, "loss": 0.3236, "step": 1453 }, { "epoch": 2.7907869481765832, "grad_norm": 0.4239642322063446, "learning_rate": 1.475619134542039e-07, "loss": 0.3324, "step": 1454 }, { "epoch": 2.7927063339731286, "grad_norm": 0.40723446011543274, "learning_rate": 1.4487986787749763e-07, "loss": 0.3207, "step": 1455 }, { "epoch": 2.7946257197696736, "grad_norm": 0.40914076566696167, "learning_rate": 1.4222206209361543e-07, "loss": 0.2896, "step": 1456 }, { "epoch": 2.796545105566219, "grad_norm": 0.4162328839302063, "learning_rate": 1.3958850937198454e-07, "loss": 0.2963, "step": 1457 }, { "epoch": 2.798464491362764, "grad_norm": 0.4240896999835968, "learning_rate": 1.3697922286094457e-07, "loss": 0.3252, "step": 1458 }, { "epoch": 2.800383877159309, "grad_norm": 0.4126145541667938, "learning_rate": 1.3439421558768484e-07, "loss": 0.3115, "step": 1459 }, { "epoch": 2.802303262955854, "grad_norm": 0.4217420220375061, "learning_rate": 1.318335004581761e-07, "loss": 0.3283, "step": 1460 }, { "epoch": 2.804222648752399, "grad_norm": 0.40719228982925415, "learning_rate": 1.292970902571078e-07, "loss": 0.3214, "step": 1461 }, { "epoch": 2.8061420345489445, "grad_norm": 0.4078000783920288, "learning_rate": 1.2678499764782525e-07, "loss": 0.2898, "step": 1462 }, { "epoch": 2.8080614203454894, "grad_norm": 0.4136858880519867, "learning_rate": 1.2429723517226212e-07, "loss": 0.323, "step": 1463 }, { "epoch": 2.8099808061420344, "grad_norm": 0.43361636996269226, "learning_rate": 1.2183381525088522e-07, "loss": 0.308, "step": 1464 }, { "epoch": 2.8119001919385798, "grad_norm": 0.4293272793292999, "learning_rate": 1.1939475018262481e-07, "loss": 0.3087, "step": 1465 }, { "epoch": 2.8138195777351247, "grad_norm": 0.4383438229560852, "learning_rate": 1.1698005214481666e-07, "loss": 0.3183, "step": 1466 }, { "epoch": 2.81573896353167, "grad_norm": 0.38439667224884033, "learning_rate": 1.1458973319314337e-07, "loss": 0.3417, "step": 1467 }, { "epoch": 2.817658349328215, "grad_norm": 0.4329484701156616, "learning_rate": 1.1222380526156929e-07, "loss": 0.3059, "step": 1468 }, { "epoch": 2.81957773512476, "grad_norm": 0.43331149220466614, "learning_rate": 1.0988228016228508e-07, "loss": 0.3383, "step": 1469 }, { "epoch": 2.8214971209213053, "grad_norm": 0.4108934998512268, "learning_rate": 1.0756516958564667e-07, "loss": 0.3543, "step": 1470 }, { "epoch": 2.8234165067178503, "grad_norm": 0.4152641296386719, "learning_rate": 1.05272485100118e-07, "loss": 0.3019, "step": 1471 }, { "epoch": 2.8253358925143957, "grad_norm": 0.397703617811203, "learning_rate": 1.0300423815221172e-07, "loss": 0.3057, "step": 1472 }, { "epoch": 2.8272552783109406, "grad_norm": 0.42640984058380127, "learning_rate": 1.007604400664347e-07, "loss": 0.3172, "step": 1473 }, { "epoch": 2.8291746641074855, "grad_norm": 0.4061037302017212, "learning_rate": 9.85411020452276e-08, "loss": 0.3065, "step": 1474 }, { "epoch": 2.8310940499040305, "grad_norm": 0.4140390455722809, "learning_rate": 9.634623516891372e-08, "loss": 0.3085, "step": 1475 }, { "epoch": 2.833013435700576, "grad_norm": 0.3956490457057953, "learning_rate": 9.417585039563748e-08, "loss": 0.2897, "step": 1476 }, { "epoch": 2.834932821497121, "grad_norm": 0.4228086471557617, "learning_rate": 9.202995856131769e-08, "loss": 0.2931, "step": 1477 }, { "epoch": 2.836852207293666, "grad_norm": 0.4182116389274597, "learning_rate": 8.9908570379586e-08, "loss": 0.3201, "step": 1478 }, { "epoch": 2.838771593090211, "grad_norm": 0.43323320150375366, "learning_rate": 8.781169644173748e-08, "loss": 0.3055, "step": 1479 }, { "epoch": 2.840690978886756, "grad_norm": 0.42262229323387146, "learning_rate": 8.573934721667731e-08, "loss": 0.2982, "step": 1480 }, { "epoch": 2.8426103646833014, "grad_norm": 0.43987467885017395, "learning_rate": 8.369153305086641e-08, "loss": 0.3248, "step": 1481 }, { "epoch": 2.8445297504798464, "grad_norm": 0.40416577458381653, "learning_rate": 8.166826416827423e-08, "loss": 0.3162, "step": 1482 }, { "epoch": 2.8464491362763917, "grad_norm": 0.4216953217983246, "learning_rate": 7.966955067032101e-08, "loss": 0.3432, "step": 1483 }, { "epoch": 2.8483685220729367, "grad_norm": 0.41451066732406616, "learning_rate": 7.769540253583452e-08, "loss": 0.2923, "step": 1484 }, { "epoch": 2.8502879078694816, "grad_norm": 0.4410330057144165, "learning_rate": 7.574582962099508e-08, "loss": 0.3157, "step": 1485 }, { "epoch": 2.852207293666027, "grad_norm": 0.42475655674934387, "learning_rate": 7.382084165928837e-08, "loss": 0.2846, "step": 1486 }, { "epoch": 2.854126679462572, "grad_norm": 0.4353221654891968, "learning_rate": 7.192044826145772e-08, "loss": 0.3203, "step": 1487 }, { "epoch": 2.8560460652591173, "grad_norm": 0.3987311124801636, "learning_rate": 7.004465891545354e-08, "loss": 0.3209, "step": 1488 }, { "epoch": 2.8579654510556622, "grad_norm": 0.4058341085910797, "learning_rate": 6.819348298638839e-08, "loss": 0.3171, "step": 1489 }, { "epoch": 2.859884836852207, "grad_norm": 0.47102582454681396, "learning_rate": 6.636692971648873e-08, "loss": 0.2662, "step": 1490 }, { "epoch": 2.8618042226487526, "grad_norm": 0.39733022451400757, "learning_rate": 6.45650082250493e-08, "loss": 0.3188, "step": 1491 }, { "epoch": 2.8637236084452975, "grad_norm": 0.43527570366859436, "learning_rate": 6.27877275083888e-08, "loss": 0.3086, "step": 1492 }, { "epoch": 2.865642994241843, "grad_norm": 0.40278005599975586, "learning_rate": 6.10350964398021e-08, "loss": 0.3371, "step": 1493 }, { "epoch": 2.867562380038388, "grad_norm": 0.41307875514030457, "learning_rate": 5.930712376951864e-08, "loss": 0.2961, "step": 1494 }, { "epoch": 2.8694817658349328, "grad_norm": 0.4294099509716034, "learning_rate": 5.7603818124657984e-08, "loss": 0.3166, "step": 1495 }, { "epoch": 2.8714011516314777, "grad_norm": 0.41227903962135315, "learning_rate": 5.5925188009184894e-08, "loss": 0.2889, "step": 1496 }, { "epoch": 2.873320537428023, "grad_norm": 0.4012930691242218, "learning_rate": 5.4271241803871e-08, "loss": 0.351, "step": 1497 }, { "epoch": 2.875239923224568, "grad_norm": 0.4242798984050751, "learning_rate": 5.264198776624818e-08, "loss": 0.3141, "step": 1498 }, { "epoch": 2.8771593090211134, "grad_norm": 0.41536352038383484, "learning_rate": 5.103743403057027e-08, "loss": 0.2987, "step": 1499 }, { "epoch": 2.8790786948176583, "grad_norm": 0.4117323160171509, "learning_rate": 4.9457588607772497e-08, "loss": 0.3115, "step": 1500 }, { "epoch": 2.8809980806142033, "grad_norm": 0.4231358468532562, "learning_rate": 4.7902459385429364e-08, "loss": 0.3368, "step": 1501 }, { "epoch": 2.8829174664107486, "grad_norm": 0.4191913604736328, "learning_rate": 4.6372054127718504e-08, "loss": 0.3299, "step": 1502 }, { "epoch": 2.8848368522072936, "grad_norm": 0.3866724669933319, "learning_rate": 4.486638047537795e-08, "loss": 0.329, "step": 1503 }, { "epoch": 2.886756238003839, "grad_norm": 0.41261211037635803, "learning_rate": 4.338544594567229e-08, "loss": 0.3121, "step": 1504 }, { "epoch": 2.888675623800384, "grad_norm": 0.3883817791938782, "learning_rate": 4.192925793235159e-08, "loss": 0.3085, "step": 1505 }, { "epoch": 2.890595009596929, "grad_norm": 0.39369550347328186, "learning_rate": 4.0497823705615836e-08, "loss": 0.3089, "step": 1506 }, { "epoch": 2.892514395393474, "grad_norm": 0.425434410572052, "learning_rate": 3.909115041207889e-08, "loss": 0.3327, "step": 1507 }, { "epoch": 2.894433781190019, "grad_norm": 0.42306071519851685, "learning_rate": 3.770924507473239e-08, "loss": 0.3079, "step": 1508 }, { "epoch": 2.8963531669865645, "grad_norm": 0.420290470123291, "learning_rate": 3.635211459291188e-08, "loss": 0.2919, "step": 1509 }, { "epoch": 2.8982725527831095, "grad_norm": 0.401720255613327, "learning_rate": 3.501976574226018e-08, "loss": 0.3316, "step": 1510 }, { "epoch": 2.9001919385796544, "grad_norm": 0.41193652153015137, "learning_rate": 3.37122051746952e-08, "loss": 0.3049, "step": 1511 }, { "epoch": 2.9021113243761993, "grad_norm": 0.4346391558647156, "learning_rate": 3.2429439418376065e-08, "loss": 0.3313, "step": 1512 }, { "epoch": 2.9040307101727447, "grad_norm": 0.4063650071620941, "learning_rate": 3.117147487767092e-08, "loss": 0.3031, "step": 1513 }, { "epoch": 2.90595009596929, "grad_norm": 0.42783722281455994, "learning_rate": 2.9938317833124175e-08, "loss": 0.3284, "step": 1514 }, { "epoch": 2.907869481765835, "grad_norm": 0.4279378354549408, "learning_rate": 2.8729974441426557e-08, "loss": 0.3093, "step": 1515 }, { "epoch": 2.90978886756238, "grad_norm": 0.3989483714103699, "learning_rate": 2.754645073538287e-08, "loss": 0.3078, "step": 1516 }, { "epoch": 2.911708253358925, "grad_norm": 0.4374085068702698, "learning_rate": 2.6387752623883158e-08, "loss": 0.3071, "step": 1517 }, { "epoch": 2.9136276391554703, "grad_norm": 0.42013007402420044, "learning_rate": 2.525388589187272e-08, "loss": 0.3125, "step": 1518 }, { "epoch": 2.9155470249520152, "grad_norm": 0.40238597989082336, "learning_rate": 2.4144856200321587e-08, "loss": 0.3239, "step": 1519 }, { "epoch": 2.9174664107485606, "grad_norm": 0.42339786887168884, "learning_rate": 2.3060669086199526e-08, "loss": 0.2836, "step": 1520 }, { "epoch": 2.9193857965451055, "grad_norm": 0.40129730105400085, "learning_rate": 2.2001329962446082e-08, "loss": 0.3114, "step": 1521 }, { "epoch": 2.9213051823416505, "grad_norm": 0.4060543477535248, "learning_rate": 2.0966844117943364e-08, "loss": 0.3198, "step": 1522 }, { "epoch": 2.923224568138196, "grad_norm": 0.41776105761528015, "learning_rate": 1.9957216717491067e-08, "loss": 0.3077, "step": 1523 }, { "epoch": 2.925143953934741, "grad_norm": 0.41161978244781494, "learning_rate": 1.8972452801780395e-08, "loss": 0.3294, "step": 1524 }, { "epoch": 2.927063339731286, "grad_norm": 0.4154324233531952, "learning_rate": 1.8012557287367394e-08, "loss": 0.2978, "step": 1525 }, { "epoch": 2.928982725527831, "grad_norm": 0.39224931597709656, "learning_rate": 1.7077534966650767e-08, "loss": 0.3108, "step": 1526 }, { "epoch": 2.930902111324376, "grad_norm": 0.4290758967399597, "learning_rate": 1.616739050784577e-08, "loss": 0.3026, "step": 1527 }, { "epoch": 2.9328214971209214, "grad_norm": 0.41678497195243835, "learning_rate": 1.528212845496202e-08, "loss": 0.3228, "step": 1528 }, { "epoch": 2.9347408829174664, "grad_norm": 0.4215105175971985, "learning_rate": 1.4421753227780721e-08, "loss": 0.2979, "step": 1529 }, { "epoch": 2.9366602687140118, "grad_norm": 0.4166526794433594, "learning_rate": 1.3586269121833028e-08, "loss": 0.3068, "step": 1530 }, { "epoch": 2.9385796545105567, "grad_norm": 0.42150306701660156, "learning_rate": 1.2775680308376726e-08, "loss": 0.3037, "step": 1531 }, { "epoch": 2.9404990403071016, "grad_norm": 0.4140715003013611, "learning_rate": 1.1989990834378462e-08, "loss": 0.3431, "step": 1532 }, { "epoch": 2.9424184261036466, "grad_norm": 0.4016764461994171, "learning_rate": 1.1229204622489886e-08, "loss": 0.2923, "step": 1533 }, { "epoch": 2.944337811900192, "grad_norm": 0.41701170802116394, "learning_rate": 1.0493325471032101e-08, "loss": 0.3142, "step": 1534 }, { "epoch": 2.946257197696737, "grad_norm": 0.42441973090171814, "learning_rate": 9.782357053972902e-09, "loss": 0.2958, "step": 1535 }, { "epoch": 2.9481765834932823, "grad_norm": 0.4126746952533722, "learning_rate": 9.096302920911238e-09, "loss": 0.335, "step": 1536 }, { "epoch": 2.950095969289827, "grad_norm": 0.4156147241592407, "learning_rate": 8.435166497057223e-09, "loss": 0.3393, "step": 1537 }, { "epoch": 2.952015355086372, "grad_norm": 0.40464186668395996, "learning_rate": 7.79895108321771e-09, "loss": 0.3218, "step": 1538 }, { "epoch": 2.9539347408829175, "grad_norm": 0.42467615008354187, "learning_rate": 7.187659855776852e-09, "loss": 0.2946, "step": 1539 }, { "epoch": 2.9558541266794625, "grad_norm": 0.4129014313220978, "learning_rate": 6.6012958666827886e-09, "loss": 0.3372, "step": 1540 }, { "epoch": 2.957773512476008, "grad_norm": 0.41969311237335205, "learning_rate": 6.039862043430989e-09, "loss": 0.3109, "step": 1541 }, { "epoch": 2.9596928982725528, "grad_norm": 0.43095022439956665, "learning_rate": 5.503361189049261e-09, "loss": 0.2907, "step": 1542 }, { "epoch": 2.9616122840690977, "grad_norm": 0.407565712928772, "learning_rate": 4.991795982085546e-09, "loss": 0.3062, "step": 1543 }, { "epoch": 2.963531669865643, "grad_norm": 0.40961360931396484, "learning_rate": 4.505168976592922e-09, "loss": 0.3067, "step": 1544 }, { "epoch": 2.965451055662188, "grad_norm": 0.41725796461105347, "learning_rate": 4.043482602116844e-09, "loss": 0.2983, "step": 1545 }, { "epoch": 2.9673704414587334, "grad_norm": 0.4130740165710449, "learning_rate": 3.6067391636845915e-09, "loss": 0.3022, "step": 1546 }, { "epoch": 2.9692898272552783, "grad_norm": 0.4097277522087097, "learning_rate": 3.1949408417925043e-09, "loss": 0.3203, "step": 1547 }, { "epoch": 2.9712092130518233, "grad_norm": 0.40231823921203613, "learning_rate": 2.8080896923943223e-09, "loss": 0.3163, "step": 1548 }, { "epoch": 2.9731285988483687, "grad_norm": 0.4145434498786926, "learning_rate": 2.4461876468934164e-09, "loss": 0.3067, "step": 1549 }, { "epoch": 2.9750479846449136, "grad_norm": 0.4461151361465454, "learning_rate": 2.1092365121305745e-09, "loss": 0.3091, "step": 1550 }, { "epoch": 2.976967370441459, "grad_norm": 0.41766566038131714, "learning_rate": 1.797237970376231e-09, "loss": 0.3136, "step": 1551 }, { "epoch": 2.978886756238004, "grad_norm": 0.42846646904945374, "learning_rate": 1.5101935793226941e-09, "loss": 0.2927, "step": 1552 }, { "epoch": 2.980806142034549, "grad_norm": 0.4253104627132416, "learning_rate": 1.2481047720735995e-09, "loss": 0.32, "step": 1553 }, { "epoch": 2.982725527831094, "grad_norm": 0.3935542404651642, "learning_rate": 1.0109728571411348e-09, "loss": 0.32, "step": 1554 }, { "epoch": 2.984644913627639, "grad_norm": 0.4024253487586975, "learning_rate": 7.987990184354921e-10, "loss": 0.293, "step": 1555 }, { "epoch": 2.986564299424184, "grad_norm": 0.45921555161476135, "learning_rate": 6.115843152609824e-10, "loss": 0.3003, "step": 1556 }, { "epoch": 2.9884836852207295, "grad_norm": 0.41469457745552063, "learning_rate": 4.4932968231048426e-10, "loss": 0.3277, "step": 1557 }, { "epoch": 2.9904030710172744, "grad_norm": 0.3882313072681427, "learning_rate": 3.1203592966044805e-10, "loss": 0.3245, "step": 1558 }, { "epoch": 2.9923224568138194, "grad_norm": 0.3875919282436371, "learning_rate": 1.997037427675652e-10, "loss": 0.3104, "step": 1559 }, { "epoch": 2.9942418426103647, "grad_norm": 0.3982383608818054, "learning_rate": 1.1233368246321708e-10, "loss": 0.2986, "step": 1560 }, { "epoch": 2.9961612284069097, "grad_norm": 0.4724661111831665, "learning_rate": 4.992618495403001e-11, "loss": 0.3285, "step": 1561 }, { "epoch": 2.998080614203455, "grad_norm": 0.3885463774204254, "learning_rate": 1.248156181743454e-11, "loss": 0.3383, "step": 1562 }, { "epoch": 3.0, "grad_norm": 0.40294045209884644, "learning_rate": 0.0, "loss": 0.311, "step": 1563 }, { "epoch": 3.0, "step": 1563, "total_flos": 1.315734133464367e+18, "train_loss": 0.0, "train_runtime": 18.1709, "train_samples_per_second": 8254.775, "train_steps_per_second": 86.016 } ], "logging_steps": 1, "max_steps": 1563, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.315734133464367e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }