{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 4205, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0004756949606065111, "grad_norm": 17.125, "learning_rate": 3.80952380952381e-07, "loss": 3.0223777294158936, "step": 2 }, { "epoch": 0.0009513899212130222, "grad_norm": 18.875, "learning_rate": 1.142857142857143e-06, "loss": 3.0833311080932617, "step": 4 }, { "epoch": 0.0014270848818195333, "grad_norm": 17.5, "learning_rate": 1.904761904761905e-06, "loss": 3.0257577896118164, "step": 6 }, { "epoch": 0.0019027798424260443, "grad_norm": 14.375, "learning_rate": 2.666666666666667e-06, "loss": 2.894188642501831, "step": 8 }, { "epoch": 0.002378474803032555, "grad_norm": 11.6875, "learning_rate": 3.428571428571429e-06, "loss": 2.8602077960968018, "step": 10 }, { "epoch": 0.0028541697636390666, "grad_norm": 9.1875, "learning_rate": 4.190476190476191e-06, "loss": 2.8444406986236572, "step": 12 }, { "epoch": 0.0033298647242455777, "grad_norm": 5.75, "learning_rate": 4.952380952380953e-06, "loss": 2.7274436950683594, "step": 14 }, { "epoch": 0.0038055596848520887, "grad_norm": 3.96875, "learning_rate": 5.7142857142857145e-06, "loss": 2.6735000610351562, "step": 16 }, { "epoch": 0.0042812546454586, "grad_norm": 3.46875, "learning_rate": 6.476190476190477e-06, "loss": 2.7295286655426025, "step": 18 }, { "epoch": 0.00475694960606511, "grad_norm": 3.390625, "learning_rate": 7.238095238095239e-06, "loss": 2.674283027648926, "step": 20 }, { "epoch": 0.005232644566671622, "grad_norm": 2.375, "learning_rate": 8.000000000000001e-06, "loss": 2.604132652282715, "step": 22 }, { "epoch": 0.005708339527278133, "grad_norm": 1.984375, "learning_rate": 8.761904761904763e-06, "loss": 2.5608315467834473, "step": 24 }, { "epoch": 0.006184034487884644, "grad_norm": 1.6484375, "learning_rate": 9.523809523809525e-06, "loss": 2.4369235038757324, "step": 26 }, { "epoch": 0.006659729448491155, "grad_norm": 1.3125, "learning_rate": 1.0285714285714285e-05, "loss": 2.526822090148926, "step": 28 }, { "epoch": 0.007135424409097666, "grad_norm": 1.1015625, "learning_rate": 1.104761904761905e-05, "loss": 2.478888511657715, "step": 30 }, { "epoch": 0.007611119369704177, "grad_norm": 1.015625, "learning_rate": 1.180952380952381e-05, "loss": 2.4330101013183594, "step": 32 }, { "epoch": 0.008086814330310688, "grad_norm": 0.88671875, "learning_rate": 1.2571428571428572e-05, "loss": 2.412613868713379, "step": 34 }, { "epoch": 0.0085625092909172, "grad_norm": 0.828125, "learning_rate": 1.3333333333333333e-05, "loss": 2.4466609954833984, "step": 36 }, { "epoch": 0.009038204251523711, "grad_norm": 0.9140625, "learning_rate": 1.4095238095238097e-05, "loss": 2.3658487796783447, "step": 38 }, { "epoch": 0.00951389921213022, "grad_norm": 0.84375, "learning_rate": 1.4857142857142858e-05, "loss": 2.355985164642334, "step": 40 }, { "epoch": 0.009989594172736732, "grad_norm": 0.796875, "learning_rate": 1.5619047619047622e-05, "loss": 2.2941904067993164, "step": 42 }, { "epoch": 0.010465289133343244, "grad_norm": 0.80078125, "learning_rate": 1.6380952380952384e-05, "loss": 2.2010486125946045, "step": 44 }, { "epoch": 0.010940984093949755, "grad_norm": 0.890625, "learning_rate": 1.7142857142857142e-05, "loss": 2.2753114700317383, "step": 46 }, { "epoch": 0.011416679054556267, "grad_norm": 0.79296875, "learning_rate": 1.7904761904761907e-05, "loss": 2.248267650604248, "step": 48 }, { "epoch": 0.011892374015162776, "grad_norm": 0.76953125, "learning_rate": 1.866666666666667e-05, "loss": 2.219675064086914, "step": 50 }, { "epoch": 0.012368068975769288, "grad_norm": 0.77734375, "learning_rate": 1.942857142857143e-05, "loss": 2.195375919342041, "step": 52 }, { "epoch": 0.0128437639363758, "grad_norm": 0.8203125, "learning_rate": 2.0190476190476192e-05, "loss": 2.113194227218628, "step": 54 }, { "epoch": 0.01331945889698231, "grad_norm": 0.89453125, "learning_rate": 2.0952380952380954e-05, "loss": 2.1435773372650146, "step": 56 }, { "epoch": 0.01379515385758882, "grad_norm": 0.9609375, "learning_rate": 2.1714285714285715e-05, "loss": 2.1617753505706787, "step": 58 }, { "epoch": 0.014270848818195332, "grad_norm": 0.9140625, "learning_rate": 2.2476190476190477e-05, "loss": 2.0906386375427246, "step": 60 }, { "epoch": 0.014746543778801843, "grad_norm": 0.890625, "learning_rate": 2.3238095238095242e-05, "loss": 2.13519024848938, "step": 62 }, { "epoch": 0.015222238739408355, "grad_norm": 0.74609375, "learning_rate": 2.4e-05, "loss": 2.0451605319976807, "step": 64 }, { "epoch": 0.015697933700014866, "grad_norm": 0.703125, "learning_rate": 2.4761904761904766e-05, "loss": 2.18241548538208, "step": 66 }, { "epoch": 0.016173628660621376, "grad_norm": 0.734375, "learning_rate": 2.5523809523809524e-05, "loss": 2.0362777709960938, "step": 68 }, { "epoch": 0.01664932362122789, "grad_norm": 0.68359375, "learning_rate": 2.628571428571429e-05, "loss": 2.0474891662597656, "step": 70 }, { "epoch": 0.0171250185818344, "grad_norm": 0.81640625, "learning_rate": 2.704761904761905e-05, "loss": 2.0943374633789062, "step": 72 }, { "epoch": 0.01760071354244091, "grad_norm": 0.703125, "learning_rate": 2.780952380952381e-05, "loss": 2.007606029510498, "step": 74 }, { "epoch": 0.018076408503047422, "grad_norm": 0.79296875, "learning_rate": 2.8571428571428574e-05, "loss": 2.010784149169922, "step": 76 }, { "epoch": 0.01855210346365393, "grad_norm": 0.859375, "learning_rate": 2.9333333333333333e-05, "loss": 2.0787105560302734, "step": 78 }, { "epoch": 0.01902779842426044, "grad_norm": 0.78515625, "learning_rate": 3.0095238095238098e-05, "loss": 2.0407357215881348, "step": 80 }, { "epoch": 0.019503493384866954, "grad_norm": 0.75390625, "learning_rate": 3.085714285714286e-05, "loss": 1.9725031852722168, "step": 82 }, { "epoch": 0.019979188345473464, "grad_norm": 0.8671875, "learning_rate": 3.161904761904762e-05, "loss": 1.9424755573272705, "step": 84 }, { "epoch": 0.020454883306079977, "grad_norm": 0.9921875, "learning_rate": 3.2380952380952386e-05, "loss": 2.0050058364868164, "step": 86 }, { "epoch": 0.020930578266686487, "grad_norm": 0.7421875, "learning_rate": 3.314285714285715e-05, "loss": 1.9476414918899536, "step": 88 }, { "epoch": 0.021406273227292997, "grad_norm": 0.94140625, "learning_rate": 3.390476190476191e-05, "loss": 1.9908151626586914, "step": 90 }, { "epoch": 0.02188196818789951, "grad_norm": 0.81640625, "learning_rate": 3.466666666666667e-05, "loss": 1.9584457874298096, "step": 92 }, { "epoch": 0.02235766314850602, "grad_norm": 0.9609375, "learning_rate": 3.542857142857143e-05, "loss": 1.9743244647979736, "step": 94 }, { "epoch": 0.022833358109112533, "grad_norm": 1.1015625, "learning_rate": 3.6190476190476195e-05, "loss": 1.9396190643310547, "step": 96 }, { "epoch": 0.023309053069719043, "grad_norm": 0.93359375, "learning_rate": 3.6952380952380956e-05, "loss": 1.8824760913848877, "step": 98 }, { "epoch": 0.023784748030325552, "grad_norm": 0.859375, "learning_rate": 3.771428571428572e-05, "loss": 1.8970260620117188, "step": 100 }, { "epoch": 0.024260442990932066, "grad_norm": 0.7890625, "learning_rate": 3.847619047619048e-05, "loss": 1.9105536937713623, "step": 102 }, { "epoch": 0.024736137951538575, "grad_norm": 0.84765625, "learning_rate": 3.923809523809524e-05, "loss": 1.8593096733093262, "step": 104 }, { "epoch": 0.02521183291214509, "grad_norm": 0.76953125, "learning_rate": 4e-05, "loss": 1.8682916164398193, "step": 106 }, { "epoch": 0.0256875278727516, "grad_norm": 0.83984375, "learning_rate": 4.0761904761904765e-05, "loss": 1.9372856616973877, "step": 108 }, { "epoch": 0.026163222833358108, "grad_norm": 1.2890625, "learning_rate": 4.1523809523809533e-05, "loss": 1.9114850759506226, "step": 110 }, { "epoch": 0.02663891779396462, "grad_norm": 1.0625, "learning_rate": 4.228571428571429e-05, "loss": 1.8821630477905273, "step": 112 }, { "epoch": 0.02711461275457113, "grad_norm": 1.015625, "learning_rate": 4.304761904761905e-05, "loss": 1.8732749223709106, "step": 114 }, { "epoch": 0.02759030771517764, "grad_norm": 0.7890625, "learning_rate": 4.380952380952382e-05, "loss": 1.8635611534118652, "step": 116 }, { "epoch": 0.028066002675784154, "grad_norm": 0.90625, "learning_rate": 4.4571428571428574e-05, "loss": 1.8261184692382812, "step": 118 }, { "epoch": 0.028541697636390664, "grad_norm": 0.9140625, "learning_rate": 4.5333333333333335e-05, "loss": 1.8533995151519775, "step": 120 }, { "epoch": 0.029017392596997177, "grad_norm": 1.03125, "learning_rate": 4.60952380952381e-05, "loss": 1.816650390625, "step": 122 }, { "epoch": 0.029493087557603687, "grad_norm": 0.87890625, "learning_rate": 4.6857142857142865e-05, "loss": 1.823215365409851, "step": 124 }, { "epoch": 0.029968782518210196, "grad_norm": 0.83984375, "learning_rate": 4.761904761904762e-05, "loss": 1.8113462924957275, "step": 126 }, { "epoch": 0.03044447747881671, "grad_norm": 0.8046875, "learning_rate": 4.838095238095238e-05, "loss": 1.7880184650421143, "step": 128 }, { "epoch": 0.03092017243942322, "grad_norm": 0.97265625, "learning_rate": 4.914285714285715e-05, "loss": 1.8012118339538574, "step": 130 }, { "epoch": 0.03139586740002973, "grad_norm": 1.046875, "learning_rate": 4.990476190476191e-05, "loss": 1.783468246459961, "step": 132 }, { "epoch": 0.03187156236063624, "grad_norm": 1.015625, "learning_rate": 5.066666666666667e-05, "loss": 1.7474174499511719, "step": 134 }, { "epoch": 0.03234725732124275, "grad_norm": 0.95703125, "learning_rate": 5.1428571428571436e-05, "loss": 1.8491697311401367, "step": 136 }, { "epoch": 0.03282295228184926, "grad_norm": 0.984375, "learning_rate": 5.21904761904762e-05, "loss": 1.77945077419281, "step": 138 }, { "epoch": 0.03329864724245578, "grad_norm": 1.046875, "learning_rate": 5.295238095238095e-05, "loss": 1.7462689876556396, "step": 140 }, { "epoch": 0.03377434220306229, "grad_norm": 1.25, "learning_rate": 5.3714285714285714e-05, "loss": 1.77305006980896, "step": 142 }, { "epoch": 0.0342500371636688, "grad_norm": 0.91015625, "learning_rate": 5.447619047619048e-05, "loss": 1.7020612955093384, "step": 144 }, { "epoch": 0.03472573212427531, "grad_norm": 0.921875, "learning_rate": 5.5238095238095244e-05, "loss": 1.8065619468688965, "step": 146 }, { "epoch": 0.03520142708488182, "grad_norm": 0.8828125, "learning_rate": 5.6e-05, "loss": 1.6848450899124146, "step": 148 }, { "epoch": 0.035677122045488334, "grad_norm": 0.80859375, "learning_rate": 5.676190476190477e-05, "loss": 1.747304916381836, "step": 150 }, { "epoch": 0.036152817006094844, "grad_norm": 1.1015625, "learning_rate": 5.752380952380953e-05, "loss": 1.7690556049346924, "step": 152 }, { "epoch": 0.03662851196670135, "grad_norm": 1.03125, "learning_rate": 5.828571428571429e-05, "loss": 1.7610713243484497, "step": 154 }, { "epoch": 0.03710420692730786, "grad_norm": 1.1640625, "learning_rate": 5.904761904761905e-05, "loss": 1.6751768589019775, "step": 156 }, { "epoch": 0.03757990188791437, "grad_norm": 0.9375, "learning_rate": 5.9809523809523814e-05, "loss": 1.6568293571472168, "step": 158 }, { "epoch": 0.03805559684852088, "grad_norm": 0.84765625, "learning_rate": 6.0571428571428576e-05, "loss": 1.7163995504379272, "step": 160 }, { "epoch": 0.0385312918091274, "grad_norm": 1.1875, "learning_rate": 6.133333333333334e-05, "loss": 1.7115540504455566, "step": 162 }, { "epoch": 0.03900698676973391, "grad_norm": 0.9453125, "learning_rate": 6.20952380952381e-05, "loss": 1.6549824476242065, "step": 164 }, { "epoch": 0.03948268173034042, "grad_norm": 0.828125, "learning_rate": 6.285714285714286e-05, "loss": 1.6670048236846924, "step": 166 }, { "epoch": 0.03995837669094693, "grad_norm": 0.89453125, "learning_rate": 6.361904761904762e-05, "loss": 1.6787292957305908, "step": 168 }, { "epoch": 0.04043407165155344, "grad_norm": 0.87890625, "learning_rate": 6.438095238095238e-05, "loss": 1.6751407384872437, "step": 170 }, { "epoch": 0.040909766612159955, "grad_norm": 0.78125, "learning_rate": 6.514285714285715e-05, "loss": 1.679162621498108, "step": 172 }, { "epoch": 0.041385461572766465, "grad_norm": 0.75, "learning_rate": 6.590476190476191e-05, "loss": 1.6422595977783203, "step": 174 }, { "epoch": 0.041861156533372974, "grad_norm": 0.97265625, "learning_rate": 6.666666666666667e-05, "loss": 1.693905234336853, "step": 176 }, { "epoch": 0.042336851493979484, "grad_norm": 0.99609375, "learning_rate": 6.742857142857143e-05, "loss": 1.7319214344024658, "step": 178 }, { "epoch": 0.042812546454585994, "grad_norm": 1.1171875, "learning_rate": 6.81904761904762e-05, "loss": 1.7077994346618652, "step": 180 }, { "epoch": 0.04328824141519251, "grad_norm": 0.87890625, "learning_rate": 6.895238095238095e-05, "loss": 1.6633131504058838, "step": 182 }, { "epoch": 0.04376393637579902, "grad_norm": 1.3515625, "learning_rate": 6.971428571428572e-05, "loss": 1.6569929122924805, "step": 184 }, { "epoch": 0.04423963133640553, "grad_norm": 1.078125, "learning_rate": 7.047619047619048e-05, "loss": 1.6756895780563354, "step": 186 }, { "epoch": 0.04471532629701204, "grad_norm": 1.296875, "learning_rate": 7.123809523809524e-05, "loss": 1.7126249074935913, "step": 188 }, { "epoch": 0.04519102125761855, "grad_norm": 0.9609375, "learning_rate": 7.2e-05, "loss": 1.6484733819961548, "step": 190 }, { "epoch": 0.045666716218225066, "grad_norm": 1.1015625, "learning_rate": 7.276190476190476e-05, "loss": 1.71817147731781, "step": 192 }, { "epoch": 0.046142411178831576, "grad_norm": 0.96484375, "learning_rate": 7.352380952380953e-05, "loss": 1.7061476707458496, "step": 194 }, { "epoch": 0.046618106139438086, "grad_norm": 0.9375, "learning_rate": 7.42857142857143e-05, "loss": 1.654850959777832, "step": 196 }, { "epoch": 0.047093801100044595, "grad_norm": 0.8984375, "learning_rate": 7.504761904761905e-05, "loss": 1.6332194805145264, "step": 198 }, { "epoch": 0.047569496060651105, "grad_norm": 0.890625, "learning_rate": 7.580952380952381e-05, "loss": 1.6425645351409912, "step": 200 }, { "epoch": 0.04804519102125762, "grad_norm": 0.80078125, "learning_rate": 7.657142857142859e-05, "loss": 1.6112370491027832, "step": 202 }, { "epoch": 0.04852088598186413, "grad_norm": 0.875, "learning_rate": 7.733333333333333e-05, "loss": 1.6736791133880615, "step": 204 }, { "epoch": 0.04899658094247064, "grad_norm": 0.90234375, "learning_rate": 7.80952380952381e-05, "loss": 1.5582149028778076, "step": 206 }, { "epoch": 0.04947227590307715, "grad_norm": 0.859375, "learning_rate": 7.885714285714287e-05, "loss": 1.605231523513794, "step": 208 }, { "epoch": 0.04994797086368366, "grad_norm": 0.95703125, "learning_rate": 7.961904761904763e-05, "loss": 1.6272740364074707, "step": 210 }, { "epoch": 0.05042366582429018, "grad_norm": 1.0234375, "learning_rate": 8e-05, "loss": 1.6227126121520996, "step": 212 }, { "epoch": 0.05089936078489669, "grad_norm": 0.99609375, "learning_rate": 8e-05, "loss": 1.7201282978057861, "step": 214 }, { "epoch": 0.0513750557455032, "grad_norm": 0.8203125, "learning_rate": 8e-05, "loss": 1.671586036682129, "step": 216 }, { "epoch": 0.051850750706109706, "grad_norm": 0.82421875, "learning_rate": 8e-05, "loss": 1.596938133239746, "step": 218 }, { "epoch": 0.052326445666716216, "grad_norm": 0.71875, "learning_rate": 8e-05, "loss": 1.5416910648345947, "step": 220 }, { "epoch": 0.052802140627322726, "grad_norm": 0.93359375, "learning_rate": 8e-05, "loss": 1.6337580680847168, "step": 222 }, { "epoch": 0.05327783558792924, "grad_norm": 0.89453125, "learning_rate": 8e-05, "loss": 1.694180965423584, "step": 224 }, { "epoch": 0.05375353054853575, "grad_norm": 1.1015625, "learning_rate": 8e-05, "loss": 1.5831806659698486, "step": 226 }, { "epoch": 0.05422922550914226, "grad_norm": 1.328125, "learning_rate": 8e-05, "loss": 1.6646983623504639, "step": 228 }, { "epoch": 0.05470492046974877, "grad_norm": 1.53125, "learning_rate": 8e-05, "loss": 1.632063627243042, "step": 230 }, { "epoch": 0.05518061543035528, "grad_norm": 1.3515625, "learning_rate": 8e-05, "loss": 1.6186381578445435, "step": 232 }, { "epoch": 0.0556563103909618, "grad_norm": 0.9296875, "learning_rate": 8e-05, "loss": 1.5822536945343018, "step": 234 }, { "epoch": 0.05613200535156831, "grad_norm": 0.9765625, "learning_rate": 8e-05, "loss": 1.598821759223938, "step": 236 }, { "epoch": 0.05660770031217482, "grad_norm": 0.8203125, "learning_rate": 8e-05, "loss": 1.6583571434020996, "step": 238 }, { "epoch": 0.05708339527278133, "grad_norm": 0.94921875, "learning_rate": 8e-05, "loss": 1.6493302583694458, "step": 240 }, { "epoch": 0.05755909023338784, "grad_norm": 0.9296875, "learning_rate": 8e-05, "loss": 1.5849549770355225, "step": 242 }, { "epoch": 0.058034785193994354, "grad_norm": 0.71875, "learning_rate": 8e-05, "loss": 1.5187630653381348, "step": 244 }, { "epoch": 0.058510480154600863, "grad_norm": 0.8046875, "learning_rate": 8e-05, "loss": 1.6261816024780273, "step": 246 }, { "epoch": 0.05898617511520737, "grad_norm": 0.94140625, "learning_rate": 8e-05, "loss": 1.5440542697906494, "step": 248 }, { "epoch": 0.05946187007581388, "grad_norm": 0.80859375, "learning_rate": 8e-05, "loss": 1.5579140186309814, "step": 250 }, { "epoch": 0.05993756503642039, "grad_norm": 0.671875, "learning_rate": 8e-05, "loss": 1.5661745071411133, "step": 252 }, { "epoch": 0.06041325999702691, "grad_norm": 0.76171875, "learning_rate": 8e-05, "loss": 1.5916748046875, "step": 254 }, { "epoch": 0.06088895495763342, "grad_norm": 0.765625, "learning_rate": 8e-05, "loss": 1.582345724105835, "step": 256 }, { "epoch": 0.06136464991823993, "grad_norm": 0.78125, "learning_rate": 8e-05, "loss": 1.6371424198150635, "step": 258 }, { "epoch": 0.06184034487884644, "grad_norm": 0.7265625, "learning_rate": 8e-05, "loss": 1.5874426364898682, "step": 260 }, { "epoch": 0.06231603983945295, "grad_norm": 0.6796875, "learning_rate": 8e-05, "loss": 1.5973892211914062, "step": 262 }, { "epoch": 0.06279173480005946, "grad_norm": 0.859375, "learning_rate": 8e-05, "loss": 1.6360384225845337, "step": 264 }, { "epoch": 0.06326742976066597, "grad_norm": 0.703125, "learning_rate": 8e-05, "loss": 1.4682174921035767, "step": 266 }, { "epoch": 0.06374312472127248, "grad_norm": 0.69140625, "learning_rate": 8e-05, "loss": 1.5434261560440063, "step": 268 }, { "epoch": 0.064218819681879, "grad_norm": 0.671875, "learning_rate": 8e-05, "loss": 1.5429248809814453, "step": 270 }, { "epoch": 0.0646945146424855, "grad_norm": 0.6328125, "learning_rate": 8e-05, "loss": 1.520768404006958, "step": 272 }, { "epoch": 0.06517020960309201, "grad_norm": 0.63671875, "learning_rate": 8e-05, "loss": 1.6287932395935059, "step": 274 }, { "epoch": 0.06564590456369852, "grad_norm": 0.75, "learning_rate": 8e-05, "loss": 1.574143409729004, "step": 276 }, { "epoch": 0.06612159952430503, "grad_norm": 0.64453125, "learning_rate": 8e-05, "loss": 1.5749611854553223, "step": 278 }, { "epoch": 0.06659729448491156, "grad_norm": 0.66796875, "learning_rate": 8e-05, "loss": 1.511296033859253, "step": 280 }, { "epoch": 0.06707298944551807, "grad_norm": 0.6171875, "learning_rate": 8e-05, "loss": 1.5300962924957275, "step": 282 }, { "epoch": 0.06754868440612458, "grad_norm": 0.59375, "learning_rate": 8e-05, "loss": 1.5132012367248535, "step": 284 }, { "epoch": 0.06802437936673109, "grad_norm": 0.59375, "learning_rate": 8e-05, "loss": 1.5248513221740723, "step": 286 }, { "epoch": 0.0685000743273376, "grad_norm": 0.64453125, "learning_rate": 8e-05, "loss": 1.4714152812957764, "step": 288 }, { "epoch": 0.0689757692879441, "grad_norm": 0.5859375, "learning_rate": 8e-05, "loss": 1.536270022392273, "step": 290 }, { "epoch": 0.06945146424855062, "grad_norm": 0.66796875, "learning_rate": 8e-05, "loss": 1.5859988927841187, "step": 292 }, { "epoch": 0.06992715920915712, "grad_norm": 0.640625, "learning_rate": 8e-05, "loss": 1.6076054573059082, "step": 294 }, { "epoch": 0.07040285416976363, "grad_norm": 0.65234375, "learning_rate": 8e-05, "loss": 1.537914752960205, "step": 296 }, { "epoch": 0.07087854913037014, "grad_norm": 0.68359375, "learning_rate": 8e-05, "loss": 1.530918002128601, "step": 298 }, { "epoch": 0.07135424409097667, "grad_norm": 0.63671875, "learning_rate": 8e-05, "loss": 1.5722606182098389, "step": 300 }, { "epoch": 0.07182993905158318, "grad_norm": 0.6328125, "learning_rate": 8e-05, "loss": 1.5380263328552246, "step": 302 }, { "epoch": 0.07230563401218969, "grad_norm": 0.62109375, "learning_rate": 8e-05, "loss": 1.543945074081421, "step": 304 }, { "epoch": 0.0727813289727962, "grad_norm": 0.671875, "learning_rate": 8e-05, "loss": 1.4990713596343994, "step": 306 }, { "epoch": 0.0732570239334027, "grad_norm": 0.5703125, "learning_rate": 8e-05, "loss": 1.5049118995666504, "step": 308 }, { "epoch": 0.07373271889400922, "grad_norm": 0.546875, "learning_rate": 8e-05, "loss": 1.5481094121932983, "step": 310 }, { "epoch": 0.07420841385461573, "grad_norm": 0.6015625, "learning_rate": 8e-05, "loss": 1.5431925058364868, "step": 312 }, { "epoch": 0.07468410881522224, "grad_norm": 0.59375, "learning_rate": 8e-05, "loss": 1.5000509023666382, "step": 314 }, { "epoch": 0.07515980377582875, "grad_norm": 0.640625, "learning_rate": 8e-05, "loss": 1.453176498413086, "step": 316 }, { "epoch": 0.07563549873643526, "grad_norm": 0.59375, "learning_rate": 8e-05, "loss": 1.5792964696884155, "step": 318 }, { "epoch": 0.07611119369704177, "grad_norm": 0.6171875, "learning_rate": 8e-05, "loss": 1.5420498847961426, "step": 320 }, { "epoch": 0.07658688865764829, "grad_norm": 0.578125, "learning_rate": 8e-05, "loss": 1.5112196207046509, "step": 322 }, { "epoch": 0.0770625836182548, "grad_norm": 0.5859375, "learning_rate": 8e-05, "loss": 1.5195896625518799, "step": 324 }, { "epoch": 0.07753827857886131, "grad_norm": 0.578125, "learning_rate": 8e-05, "loss": 1.4650981426239014, "step": 326 }, { "epoch": 0.07801397353946782, "grad_norm": 0.6015625, "learning_rate": 8e-05, "loss": 1.4874310493469238, "step": 328 }, { "epoch": 0.07848966850007433, "grad_norm": 0.5546875, "learning_rate": 8e-05, "loss": 1.4700895547866821, "step": 330 }, { "epoch": 0.07896536346068084, "grad_norm": 0.59765625, "learning_rate": 8e-05, "loss": 1.4657363891601562, "step": 332 }, { "epoch": 0.07944105842128735, "grad_norm": 0.63671875, "learning_rate": 8e-05, "loss": 1.5102603435516357, "step": 334 }, { "epoch": 0.07991675338189386, "grad_norm": 0.63671875, "learning_rate": 8e-05, "loss": 1.48199462890625, "step": 336 }, { "epoch": 0.08039244834250037, "grad_norm": 0.58984375, "learning_rate": 8e-05, "loss": 1.573965072631836, "step": 338 }, { "epoch": 0.08086814330310688, "grad_norm": 0.5859375, "learning_rate": 8e-05, "loss": 1.552716851234436, "step": 340 }, { "epoch": 0.0813438382637134, "grad_norm": 0.59765625, "learning_rate": 8e-05, "loss": 1.5109150409698486, "step": 342 }, { "epoch": 0.08181953322431991, "grad_norm": 0.62890625, "learning_rate": 8e-05, "loss": 1.478667974472046, "step": 344 }, { "epoch": 0.08229522818492642, "grad_norm": 0.55859375, "learning_rate": 8e-05, "loss": 1.4951369762420654, "step": 346 }, { "epoch": 0.08277092314553293, "grad_norm": 0.5234375, "learning_rate": 8e-05, "loss": 1.4743764400482178, "step": 348 }, { "epoch": 0.08324661810613944, "grad_norm": 0.59375, "learning_rate": 8e-05, "loss": 1.5435149669647217, "step": 350 }, { "epoch": 0.08372231306674595, "grad_norm": 0.5546875, "learning_rate": 8e-05, "loss": 1.4307265281677246, "step": 352 }, { "epoch": 0.08419800802735246, "grad_norm": 0.60546875, "learning_rate": 8e-05, "loss": 1.5382444858551025, "step": 354 }, { "epoch": 0.08467370298795897, "grad_norm": 0.56640625, "learning_rate": 8e-05, "loss": 1.4578557014465332, "step": 356 }, { "epoch": 0.08514939794856548, "grad_norm": 0.5625, "learning_rate": 8e-05, "loss": 1.5319006443023682, "step": 358 }, { "epoch": 0.08562509290917199, "grad_norm": 0.52734375, "learning_rate": 8e-05, "loss": 1.511313796043396, "step": 360 }, { "epoch": 0.08610078786977851, "grad_norm": 0.55078125, "learning_rate": 8e-05, "loss": 1.4577925205230713, "step": 362 }, { "epoch": 0.08657648283038502, "grad_norm": 0.54296875, "learning_rate": 8e-05, "loss": 1.5221188068389893, "step": 364 }, { "epoch": 0.08705217779099153, "grad_norm": 0.515625, "learning_rate": 8e-05, "loss": 1.4162304401397705, "step": 366 }, { "epoch": 0.08752787275159804, "grad_norm": 0.53515625, "learning_rate": 8e-05, "loss": 1.4972211122512817, "step": 368 }, { "epoch": 0.08800356771220455, "grad_norm": 0.53515625, "learning_rate": 8e-05, "loss": 1.5569958686828613, "step": 370 }, { "epoch": 0.08847926267281106, "grad_norm": 0.54296875, "learning_rate": 8e-05, "loss": 1.467348337173462, "step": 372 }, { "epoch": 0.08895495763341757, "grad_norm": 0.53515625, "learning_rate": 8e-05, "loss": 1.4739539623260498, "step": 374 }, { "epoch": 0.08943065259402408, "grad_norm": 0.546875, "learning_rate": 8e-05, "loss": 1.478308916091919, "step": 376 }, { "epoch": 0.08990634755463059, "grad_norm": 0.5078125, "learning_rate": 8e-05, "loss": 1.4767718315124512, "step": 378 }, { "epoch": 0.0903820425152371, "grad_norm": 0.52734375, "learning_rate": 8e-05, "loss": 1.51509690284729, "step": 380 }, { "epoch": 0.09085773747584361, "grad_norm": 0.5234375, "learning_rate": 8e-05, "loss": 1.4501855373382568, "step": 382 }, { "epoch": 0.09133343243645013, "grad_norm": 0.5390625, "learning_rate": 8e-05, "loss": 1.45088529586792, "step": 384 }, { "epoch": 0.09180912739705664, "grad_norm": 0.52734375, "learning_rate": 8e-05, "loss": 1.4980132579803467, "step": 386 }, { "epoch": 0.09228482235766315, "grad_norm": 0.51953125, "learning_rate": 8e-05, "loss": 1.5103974342346191, "step": 388 }, { "epoch": 0.09276051731826966, "grad_norm": 0.58203125, "learning_rate": 8e-05, "loss": 1.4751368761062622, "step": 390 }, { "epoch": 0.09323621227887617, "grad_norm": 0.62109375, "learning_rate": 8e-05, "loss": 1.494489073753357, "step": 392 }, { "epoch": 0.09371190723948268, "grad_norm": 0.61328125, "learning_rate": 8e-05, "loss": 1.497837781906128, "step": 394 }, { "epoch": 0.09418760220008919, "grad_norm": 0.55859375, "learning_rate": 8e-05, "loss": 1.5514724254608154, "step": 396 }, { "epoch": 0.0946632971606957, "grad_norm": 0.86328125, "learning_rate": 8e-05, "loss": 1.5110323429107666, "step": 398 }, { "epoch": 0.09513899212130221, "grad_norm": 0.63671875, "learning_rate": 8e-05, "loss": 1.4153172969818115, "step": 400 }, { "epoch": 0.09561468708190872, "grad_norm": 0.51953125, "learning_rate": 8e-05, "loss": 1.4518225193023682, "step": 402 }, { "epoch": 0.09609038204251524, "grad_norm": 0.578125, "learning_rate": 8e-05, "loss": 1.4477956295013428, "step": 404 }, { "epoch": 0.09656607700312175, "grad_norm": 0.62109375, "learning_rate": 8e-05, "loss": 1.4921960830688477, "step": 406 }, { "epoch": 0.09704177196372826, "grad_norm": 0.6328125, "learning_rate": 8e-05, "loss": 1.4914698600769043, "step": 408 }, { "epoch": 0.09751746692433477, "grad_norm": 0.59375, "learning_rate": 8e-05, "loss": 1.4477533102035522, "step": 410 }, { "epoch": 0.09799316188494128, "grad_norm": 0.546875, "learning_rate": 8e-05, "loss": 1.499894142150879, "step": 412 }, { "epoch": 0.09846885684554779, "grad_norm": 0.5859375, "learning_rate": 8e-05, "loss": 1.5634784698486328, "step": 414 }, { "epoch": 0.0989445518061543, "grad_norm": 0.515625, "learning_rate": 8e-05, "loss": 1.4383997917175293, "step": 416 }, { "epoch": 0.09942024676676081, "grad_norm": 0.55859375, "learning_rate": 8e-05, "loss": 1.4373674392700195, "step": 418 }, { "epoch": 0.09989594172736732, "grad_norm": 0.5703125, "learning_rate": 8e-05, "loss": 1.475003719329834, "step": 420 }, { "epoch": 0.10037163668797383, "grad_norm": 0.5859375, "learning_rate": 8e-05, "loss": 1.4678364992141724, "step": 422 }, { "epoch": 0.10084733164858035, "grad_norm": 0.5390625, "learning_rate": 8e-05, "loss": 1.45807945728302, "step": 424 }, { "epoch": 0.10132302660918686, "grad_norm": 0.54296875, "learning_rate": 8e-05, "loss": 1.5057690143585205, "step": 426 }, { "epoch": 0.10179872156979337, "grad_norm": 0.50390625, "learning_rate": 8e-05, "loss": 1.4296057224273682, "step": 428 }, { "epoch": 0.10227441653039988, "grad_norm": 0.54296875, "learning_rate": 8e-05, "loss": 1.5102698802947998, "step": 430 }, { "epoch": 0.1027501114910064, "grad_norm": 0.5078125, "learning_rate": 8e-05, "loss": 1.37981116771698, "step": 432 }, { "epoch": 0.1032258064516129, "grad_norm": 0.52734375, "learning_rate": 8e-05, "loss": 1.4619908332824707, "step": 434 }, { "epoch": 0.10370150141221941, "grad_norm": 0.55859375, "learning_rate": 8e-05, "loss": 1.4256863594055176, "step": 436 }, { "epoch": 0.10417719637282592, "grad_norm": 0.6171875, "learning_rate": 8e-05, "loss": 1.4363038539886475, "step": 438 }, { "epoch": 0.10465289133343243, "grad_norm": 0.58203125, "learning_rate": 8e-05, "loss": 1.3884978294372559, "step": 440 }, { "epoch": 0.10512858629403894, "grad_norm": 0.62109375, "learning_rate": 8e-05, "loss": 1.4545469284057617, "step": 442 }, { "epoch": 0.10560428125464545, "grad_norm": 0.5703125, "learning_rate": 8e-05, "loss": 1.4439201354980469, "step": 444 }, { "epoch": 0.10607997621525198, "grad_norm": 0.58984375, "learning_rate": 8e-05, "loss": 1.5349268913269043, "step": 446 }, { "epoch": 0.10655567117585849, "grad_norm": 0.609375, "learning_rate": 8e-05, "loss": 1.591422438621521, "step": 448 }, { "epoch": 0.107031366136465, "grad_norm": 0.6640625, "learning_rate": 8e-05, "loss": 1.4320346117019653, "step": 450 }, { "epoch": 0.1075070610970715, "grad_norm": 0.62109375, "learning_rate": 8e-05, "loss": 1.4518539905548096, "step": 452 }, { "epoch": 0.10798275605767801, "grad_norm": 0.5859375, "learning_rate": 8e-05, "loss": 1.5056893825531006, "step": 454 }, { "epoch": 0.10845845101828452, "grad_norm": 0.54296875, "learning_rate": 8e-05, "loss": 1.3535287380218506, "step": 456 }, { "epoch": 0.10893414597889103, "grad_norm": 0.57421875, "learning_rate": 8e-05, "loss": 1.4125394821166992, "step": 458 }, { "epoch": 0.10940984093949754, "grad_norm": 0.51171875, "learning_rate": 8e-05, "loss": 1.3990814685821533, "step": 460 }, { "epoch": 0.10988553590010405, "grad_norm": 0.5546875, "learning_rate": 8e-05, "loss": 1.4865885972976685, "step": 462 }, { "epoch": 0.11036123086071056, "grad_norm": 0.5234375, "learning_rate": 8e-05, "loss": 1.415689468383789, "step": 464 }, { "epoch": 0.11083692582131709, "grad_norm": 0.53515625, "learning_rate": 8e-05, "loss": 1.453460931777954, "step": 466 }, { "epoch": 0.1113126207819236, "grad_norm": 0.56640625, "learning_rate": 8e-05, "loss": 1.4493913650512695, "step": 468 }, { "epoch": 0.1117883157425301, "grad_norm": 0.578125, "learning_rate": 8e-05, "loss": 1.4510160684585571, "step": 470 }, { "epoch": 0.11226401070313662, "grad_norm": 0.5859375, "learning_rate": 8e-05, "loss": 1.5191359519958496, "step": 472 }, { "epoch": 0.11273970566374313, "grad_norm": 0.59375, "learning_rate": 8e-05, "loss": 1.5257389545440674, "step": 474 }, { "epoch": 0.11321540062434964, "grad_norm": 0.54296875, "learning_rate": 8e-05, "loss": 1.4620857238769531, "step": 476 }, { "epoch": 0.11369109558495614, "grad_norm": 0.5390625, "learning_rate": 8e-05, "loss": 1.4049038887023926, "step": 478 }, { "epoch": 0.11416679054556265, "grad_norm": 0.54296875, "learning_rate": 8e-05, "loss": 1.387476921081543, "step": 480 }, { "epoch": 0.11464248550616916, "grad_norm": 0.53125, "learning_rate": 8e-05, "loss": 1.4375786781311035, "step": 482 }, { "epoch": 0.11511818046677567, "grad_norm": 0.5, "learning_rate": 8e-05, "loss": 1.4502665996551514, "step": 484 }, { "epoch": 0.1155938754273822, "grad_norm": 0.51171875, "learning_rate": 8e-05, "loss": 1.4172968864440918, "step": 486 }, { "epoch": 0.11606957038798871, "grad_norm": 0.51171875, "learning_rate": 8e-05, "loss": 1.3814518451690674, "step": 488 }, { "epoch": 0.11654526534859522, "grad_norm": 0.546875, "learning_rate": 8e-05, "loss": 1.4727611541748047, "step": 490 }, { "epoch": 0.11702096030920173, "grad_norm": 0.55859375, "learning_rate": 8e-05, "loss": 1.4043948650360107, "step": 492 }, { "epoch": 0.11749665526980824, "grad_norm": 0.53125, "learning_rate": 8e-05, "loss": 1.4327163696289062, "step": 494 }, { "epoch": 0.11797235023041475, "grad_norm": 0.578125, "learning_rate": 8e-05, "loss": 1.4427610635757446, "step": 496 }, { "epoch": 0.11844804519102126, "grad_norm": 0.5234375, "learning_rate": 8e-05, "loss": 1.4240474700927734, "step": 498 }, { "epoch": 0.11892374015162777, "grad_norm": 0.578125, "learning_rate": 8e-05, "loss": 1.471658706665039, "step": 500 }, { "epoch": 0.11939943511223428, "grad_norm": 0.63671875, "learning_rate": 8e-05, "loss": 1.4233098030090332, "step": 502 }, { "epoch": 0.11987513007284079, "grad_norm": 0.5859375, "learning_rate": 8e-05, "loss": 1.3341891765594482, "step": 504 }, { "epoch": 0.1203508250334473, "grad_norm": 0.5390625, "learning_rate": 8e-05, "loss": 1.3719563484191895, "step": 506 }, { "epoch": 0.12082651999405382, "grad_norm": 0.6171875, "learning_rate": 8e-05, "loss": 1.4592832326889038, "step": 508 }, { "epoch": 0.12130221495466033, "grad_norm": 0.6015625, "learning_rate": 8e-05, "loss": 1.4251080751419067, "step": 510 }, { "epoch": 0.12177790991526684, "grad_norm": 0.54296875, "learning_rate": 8e-05, "loss": 1.4636266231536865, "step": 512 }, { "epoch": 0.12225360487587335, "grad_norm": 0.5703125, "learning_rate": 8e-05, "loss": 1.3918344974517822, "step": 514 }, { "epoch": 0.12272929983647986, "grad_norm": 0.5625, "learning_rate": 8e-05, "loss": 1.4410995244979858, "step": 516 }, { "epoch": 0.12320499479708637, "grad_norm": 0.53125, "learning_rate": 8e-05, "loss": 1.4553332328796387, "step": 518 }, { "epoch": 0.12368068975769288, "grad_norm": 0.48828125, "learning_rate": 8e-05, "loss": 1.4932277202606201, "step": 520 }, { "epoch": 0.12415638471829939, "grad_norm": 0.5234375, "learning_rate": 8e-05, "loss": 1.4398219585418701, "step": 522 }, { "epoch": 0.1246320796789059, "grad_norm": 0.5234375, "learning_rate": 8e-05, "loss": 1.4100382328033447, "step": 524 }, { "epoch": 0.12510777463951242, "grad_norm": 0.52734375, "learning_rate": 8e-05, "loss": 1.4194281101226807, "step": 526 }, { "epoch": 0.12558346960011893, "grad_norm": 0.50390625, "learning_rate": 8e-05, "loss": 1.4007759094238281, "step": 528 }, { "epoch": 0.12605916456072544, "grad_norm": 0.515625, "learning_rate": 8e-05, "loss": 1.3943548202514648, "step": 530 }, { "epoch": 0.12653485952133195, "grad_norm": 0.5, "learning_rate": 8e-05, "loss": 1.3672170639038086, "step": 532 }, { "epoch": 0.12701055448193846, "grad_norm": 0.58203125, "learning_rate": 8e-05, "loss": 1.3937108516693115, "step": 534 }, { "epoch": 0.12748624944254497, "grad_norm": 0.59375, "learning_rate": 8e-05, "loss": 1.4582862854003906, "step": 536 }, { "epoch": 0.12796194440315148, "grad_norm": 0.52734375, "learning_rate": 8e-05, "loss": 1.417186975479126, "step": 538 }, { "epoch": 0.128437639363758, "grad_norm": 0.5, "learning_rate": 8e-05, "loss": 1.3944048881530762, "step": 540 }, { "epoch": 0.1289133343243645, "grad_norm": 0.515625, "learning_rate": 8e-05, "loss": 1.4348058700561523, "step": 542 }, { "epoch": 0.129389029284971, "grad_norm": 0.56640625, "learning_rate": 8e-05, "loss": 1.4025098085403442, "step": 544 }, { "epoch": 0.12986472424557752, "grad_norm": 0.5859375, "learning_rate": 8e-05, "loss": 1.4666318893432617, "step": 546 }, { "epoch": 0.13034041920618403, "grad_norm": 0.50390625, "learning_rate": 8e-05, "loss": 1.3959743976593018, "step": 548 }, { "epoch": 0.13081611416679054, "grad_norm": 0.515625, "learning_rate": 8e-05, "loss": 1.435917615890503, "step": 550 }, { "epoch": 0.13129180912739705, "grad_norm": 0.5234375, "learning_rate": 8e-05, "loss": 1.4515659809112549, "step": 552 }, { "epoch": 0.13176750408800356, "grad_norm": 0.51953125, "learning_rate": 8e-05, "loss": 1.4431695938110352, "step": 554 }, { "epoch": 0.13224319904861007, "grad_norm": 0.5703125, "learning_rate": 8e-05, "loss": 1.3696998357772827, "step": 556 }, { "epoch": 0.1327188940092166, "grad_norm": 0.515625, "learning_rate": 8e-05, "loss": 1.4764920473098755, "step": 558 }, { "epoch": 0.1331945889698231, "grad_norm": 0.484375, "learning_rate": 8e-05, "loss": 1.3894143104553223, "step": 560 }, { "epoch": 0.13367028393042962, "grad_norm": 0.51171875, "learning_rate": 8e-05, "loss": 1.4079980850219727, "step": 562 }, { "epoch": 0.13414597889103613, "grad_norm": 0.49609375, "learning_rate": 8e-05, "loss": 1.3896784782409668, "step": 564 }, { "epoch": 0.13462167385164264, "grad_norm": 0.498046875, "learning_rate": 8e-05, "loss": 1.4342916011810303, "step": 566 }, { "epoch": 0.13509736881224915, "grad_norm": 0.5078125, "learning_rate": 8e-05, "loss": 1.4243568181991577, "step": 568 }, { "epoch": 0.13557306377285566, "grad_norm": 0.47265625, "learning_rate": 8e-05, "loss": 1.4043259620666504, "step": 570 }, { "epoch": 0.13604875873346217, "grad_norm": 0.474609375, "learning_rate": 8e-05, "loss": 1.4198546409606934, "step": 572 }, { "epoch": 0.13652445369406868, "grad_norm": 0.453125, "learning_rate": 8e-05, "loss": 1.3509985208511353, "step": 574 }, { "epoch": 0.1370001486546752, "grad_norm": 0.48046875, "learning_rate": 8e-05, "loss": 1.3983509540557861, "step": 576 }, { "epoch": 0.1374758436152817, "grad_norm": 0.4921875, "learning_rate": 8e-05, "loss": 1.4067437648773193, "step": 578 }, { "epoch": 0.1379515385758882, "grad_norm": 0.515625, "learning_rate": 8e-05, "loss": 1.4863321781158447, "step": 580 }, { "epoch": 0.13842723353649472, "grad_norm": 0.48828125, "learning_rate": 8e-05, "loss": 1.4052914381027222, "step": 582 }, { "epoch": 0.13890292849710123, "grad_norm": 0.54296875, "learning_rate": 8e-05, "loss": 1.408928394317627, "step": 584 }, { "epoch": 0.13937862345770774, "grad_norm": 0.55859375, "learning_rate": 8e-05, "loss": 1.4460136890411377, "step": 586 }, { "epoch": 0.13985431841831425, "grad_norm": 0.51171875, "learning_rate": 8e-05, "loss": 1.4335639476776123, "step": 588 }, { "epoch": 0.14033001337892076, "grad_norm": 0.53125, "learning_rate": 8e-05, "loss": 1.3965034484863281, "step": 590 }, { "epoch": 0.14080570833952727, "grad_norm": 0.53125, "learning_rate": 8e-05, "loss": 1.4012255668640137, "step": 592 }, { "epoch": 0.14128140330013378, "grad_norm": 0.53515625, "learning_rate": 8e-05, "loss": 1.4261143207550049, "step": 594 }, { "epoch": 0.1417570982607403, "grad_norm": 0.515625, "learning_rate": 8e-05, "loss": 1.4008715152740479, "step": 596 }, { "epoch": 0.1422327932213468, "grad_norm": 0.52734375, "learning_rate": 8e-05, "loss": 1.4034451246261597, "step": 598 }, { "epoch": 0.14270848818195334, "grad_norm": 0.5, "learning_rate": 8e-05, "loss": 1.4082181453704834, "step": 600 }, { "epoch": 0.14318418314255985, "grad_norm": 0.48828125, "learning_rate": 8e-05, "loss": 1.3725682497024536, "step": 602 }, { "epoch": 0.14365987810316636, "grad_norm": 0.52734375, "learning_rate": 8e-05, "loss": 1.4293782711029053, "step": 604 }, { "epoch": 0.14413557306377286, "grad_norm": 0.5, "learning_rate": 8e-05, "loss": 1.4520360231399536, "step": 606 }, { "epoch": 0.14461126802437937, "grad_norm": 0.48828125, "learning_rate": 8e-05, "loss": 1.3525224924087524, "step": 608 }, { "epoch": 0.14508696298498588, "grad_norm": 0.5078125, "learning_rate": 8e-05, "loss": 1.4208955764770508, "step": 610 }, { "epoch": 0.1455626579455924, "grad_norm": 0.48828125, "learning_rate": 8e-05, "loss": 1.3334312438964844, "step": 612 }, { "epoch": 0.1460383529061989, "grad_norm": 0.53125, "learning_rate": 8e-05, "loss": 1.3503882884979248, "step": 614 }, { "epoch": 0.1465140478668054, "grad_norm": 0.54296875, "learning_rate": 8e-05, "loss": 1.4191619157791138, "step": 616 }, { "epoch": 0.14698974282741192, "grad_norm": 0.470703125, "learning_rate": 8e-05, "loss": 1.3381874561309814, "step": 618 }, { "epoch": 0.14746543778801843, "grad_norm": 0.48828125, "learning_rate": 8e-05, "loss": 1.3882290124893188, "step": 620 }, { "epoch": 0.14794113274862494, "grad_norm": 0.486328125, "learning_rate": 8e-05, "loss": 1.4173054695129395, "step": 622 }, { "epoch": 0.14841682770923145, "grad_norm": 0.49609375, "learning_rate": 8e-05, "loss": 1.3113012313842773, "step": 624 }, { "epoch": 0.14889252266983796, "grad_norm": 0.5390625, "learning_rate": 8e-05, "loss": 1.407628059387207, "step": 626 }, { "epoch": 0.14936821763044447, "grad_norm": 0.5390625, "learning_rate": 8e-05, "loss": 1.447212815284729, "step": 628 }, { "epoch": 0.14984391259105098, "grad_norm": 0.51171875, "learning_rate": 8e-05, "loss": 1.3606481552124023, "step": 630 }, { "epoch": 0.1503196075516575, "grad_norm": 0.55078125, "learning_rate": 8e-05, "loss": 1.4575624465942383, "step": 632 }, { "epoch": 0.150795302512264, "grad_norm": 0.56640625, "learning_rate": 8e-05, "loss": 1.355147123336792, "step": 634 }, { "epoch": 0.1512709974728705, "grad_norm": 0.498046875, "learning_rate": 8e-05, "loss": 1.37825345993042, "step": 636 }, { "epoch": 0.15174669243347702, "grad_norm": 0.50390625, "learning_rate": 8e-05, "loss": 1.4053802490234375, "step": 638 }, { "epoch": 0.15222238739408353, "grad_norm": 0.478515625, "learning_rate": 8e-05, "loss": 1.3817956447601318, "step": 640 }, { "epoch": 0.15269808235469007, "grad_norm": 0.498046875, "learning_rate": 8e-05, "loss": 1.3938934803009033, "step": 642 }, { "epoch": 0.15317377731529658, "grad_norm": 0.53125, "learning_rate": 8e-05, "loss": 1.35261869430542, "step": 644 }, { "epoch": 0.1536494722759031, "grad_norm": 0.51171875, "learning_rate": 8e-05, "loss": 1.3819756507873535, "step": 646 }, { "epoch": 0.1541251672365096, "grad_norm": 0.48828125, "learning_rate": 8e-05, "loss": 1.4077363014221191, "step": 648 }, { "epoch": 0.1546008621971161, "grad_norm": 0.50390625, "learning_rate": 8e-05, "loss": 1.4303150177001953, "step": 650 }, { "epoch": 0.15507655715772262, "grad_norm": 0.49609375, "learning_rate": 8e-05, "loss": 1.3727548122406006, "step": 652 }, { "epoch": 0.15555225211832913, "grad_norm": 0.4921875, "learning_rate": 8e-05, "loss": 1.4012013673782349, "step": 654 }, { "epoch": 0.15602794707893564, "grad_norm": 0.48046875, "learning_rate": 8e-05, "loss": 1.3778249025344849, "step": 656 }, { "epoch": 0.15650364203954215, "grad_norm": 0.470703125, "learning_rate": 8e-05, "loss": 1.384866714477539, "step": 658 }, { "epoch": 0.15697933700014866, "grad_norm": 0.51171875, "learning_rate": 8e-05, "loss": 1.4081860780715942, "step": 660 }, { "epoch": 0.15745503196075517, "grad_norm": 0.482421875, "learning_rate": 8e-05, "loss": 1.3876349925994873, "step": 662 }, { "epoch": 0.15793072692136167, "grad_norm": 0.52734375, "learning_rate": 8e-05, "loss": 1.420417070388794, "step": 664 }, { "epoch": 0.15840642188196818, "grad_norm": 0.50390625, "learning_rate": 8e-05, "loss": 1.427546501159668, "step": 666 }, { "epoch": 0.1588821168425747, "grad_norm": 0.484375, "learning_rate": 8e-05, "loss": 1.404707431793213, "step": 668 }, { "epoch": 0.1593578118031812, "grad_norm": 0.458984375, "learning_rate": 8e-05, "loss": 1.4167988300323486, "step": 670 }, { "epoch": 0.1598335067637877, "grad_norm": 0.482421875, "learning_rate": 8e-05, "loss": 1.36492919921875, "step": 672 }, { "epoch": 0.16030920172439422, "grad_norm": 0.546875, "learning_rate": 8e-05, "loss": 1.4290658235549927, "step": 674 }, { "epoch": 0.16078489668500073, "grad_norm": 0.50390625, "learning_rate": 8e-05, "loss": 1.3939204216003418, "step": 676 }, { "epoch": 0.16126059164560724, "grad_norm": 0.47265625, "learning_rate": 8e-05, "loss": 1.368532419204712, "step": 678 }, { "epoch": 0.16173628660621375, "grad_norm": 0.466796875, "learning_rate": 8e-05, "loss": 1.4039356708526611, "step": 680 }, { "epoch": 0.1622119815668203, "grad_norm": 0.453125, "learning_rate": 8e-05, "loss": 1.4631690979003906, "step": 682 }, { "epoch": 0.1626876765274268, "grad_norm": 0.458984375, "learning_rate": 8e-05, "loss": 1.3834668397903442, "step": 684 }, { "epoch": 0.1631633714880333, "grad_norm": 0.46484375, "learning_rate": 8e-05, "loss": 1.373947262763977, "step": 686 }, { "epoch": 0.16363906644863982, "grad_norm": 0.451171875, "learning_rate": 8e-05, "loss": 1.4164583683013916, "step": 688 }, { "epoch": 0.16411476140924633, "grad_norm": 0.474609375, "learning_rate": 8e-05, "loss": 1.4322106838226318, "step": 690 }, { "epoch": 0.16459045636985284, "grad_norm": 0.48828125, "learning_rate": 8e-05, "loss": 1.3455379009246826, "step": 692 }, { "epoch": 0.16506615133045935, "grad_norm": 0.482421875, "learning_rate": 8e-05, "loss": 1.34842050075531, "step": 694 }, { "epoch": 0.16554184629106586, "grad_norm": 0.490234375, "learning_rate": 8e-05, "loss": 1.428257942199707, "step": 696 }, { "epoch": 0.16601754125167237, "grad_norm": 0.5, "learning_rate": 8e-05, "loss": 1.417719841003418, "step": 698 }, { "epoch": 0.16649323621227888, "grad_norm": 0.453125, "learning_rate": 8e-05, "loss": 1.4131088256835938, "step": 700 }, { "epoch": 0.1669689311728854, "grad_norm": 0.5078125, "learning_rate": 8e-05, "loss": 1.356804370880127, "step": 702 }, { "epoch": 0.1674446261334919, "grad_norm": 0.5, "learning_rate": 8e-05, "loss": 1.355417251586914, "step": 704 }, { "epoch": 0.1679203210940984, "grad_norm": 0.54296875, "learning_rate": 8e-05, "loss": 1.4380789995193481, "step": 706 }, { "epoch": 0.16839601605470492, "grad_norm": 0.4921875, "learning_rate": 8e-05, "loss": 1.3789442777633667, "step": 708 }, { "epoch": 0.16887171101531143, "grad_norm": 0.474609375, "learning_rate": 8e-05, "loss": 1.3488481044769287, "step": 710 }, { "epoch": 0.16934740597591794, "grad_norm": 0.48828125, "learning_rate": 8e-05, "loss": 1.3990561962127686, "step": 712 }, { "epoch": 0.16982310093652445, "grad_norm": 0.490234375, "learning_rate": 8e-05, "loss": 1.3976104259490967, "step": 714 }, { "epoch": 0.17029879589713096, "grad_norm": 0.494140625, "learning_rate": 8e-05, "loss": 1.430433750152588, "step": 716 }, { "epoch": 0.17077449085773747, "grad_norm": 0.462890625, "learning_rate": 8e-05, "loss": 1.359434723854065, "step": 718 }, { "epoch": 0.17125018581834398, "grad_norm": 0.515625, "learning_rate": 8e-05, "loss": 1.4045766592025757, "step": 720 }, { "epoch": 0.17172588077895048, "grad_norm": 0.49609375, "learning_rate": 8e-05, "loss": 1.3606858253479004, "step": 722 }, { "epoch": 0.17220157573955702, "grad_norm": 0.466796875, "learning_rate": 8e-05, "loss": 1.4614171981811523, "step": 724 }, { "epoch": 0.17267727070016353, "grad_norm": 0.47265625, "learning_rate": 8e-05, "loss": 1.416619062423706, "step": 726 }, { "epoch": 0.17315296566077004, "grad_norm": 0.44921875, "learning_rate": 8e-05, "loss": 1.2924635410308838, "step": 728 }, { "epoch": 0.17362866062137655, "grad_norm": 0.47265625, "learning_rate": 8e-05, "loss": 1.3354673385620117, "step": 730 }, { "epoch": 0.17410435558198306, "grad_norm": 0.4765625, "learning_rate": 8e-05, "loss": 1.3578845262527466, "step": 732 }, { "epoch": 0.17458005054258957, "grad_norm": 0.49609375, "learning_rate": 8e-05, "loss": 1.4009724855422974, "step": 734 }, { "epoch": 0.17505574550319608, "grad_norm": 0.48046875, "learning_rate": 8e-05, "loss": 1.4139372110366821, "step": 736 }, { "epoch": 0.1755314404638026, "grad_norm": 0.45703125, "learning_rate": 8e-05, "loss": 1.3427128791809082, "step": 738 }, { "epoch": 0.1760071354244091, "grad_norm": 0.51171875, "learning_rate": 8e-05, "loss": 1.3915586471557617, "step": 740 }, { "epoch": 0.1764828303850156, "grad_norm": 0.48046875, "learning_rate": 8e-05, "loss": 1.3710131645202637, "step": 742 }, { "epoch": 0.17695852534562212, "grad_norm": 0.48046875, "learning_rate": 8e-05, "loss": 1.3700971603393555, "step": 744 }, { "epoch": 0.17743422030622863, "grad_norm": 0.486328125, "learning_rate": 8e-05, "loss": 1.3621227741241455, "step": 746 }, { "epoch": 0.17790991526683514, "grad_norm": 0.5, "learning_rate": 8e-05, "loss": 1.38692307472229, "step": 748 }, { "epoch": 0.17838561022744165, "grad_norm": 0.44921875, "learning_rate": 8e-05, "loss": 1.352332592010498, "step": 750 }, { "epoch": 0.17886130518804816, "grad_norm": 0.515625, "learning_rate": 8e-05, "loss": 1.4046599864959717, "step": 752 }, { "epoch": 0.17933700014865467, "grad_norm": 0.5390625, "learning_rate": 8e-05, "loss": 1.3857762813568115, "step": 754 }, { "epoch": 0.17981269510926118, "grad_norm": 0.546875, "learning_rate": 8e-05, "loss": 1.3184947967529297, "step": 756 }, { "epoch": 0.1802883900698677, "grad_norm": 0.5078125, "learning_rate": 8e-05, "loss": 1.3482776880264282, "step": 758 }, { "epoch": 0.1807640850304742, "grad_norm": 0.51953125, "learning_rate": 8e-05, "loss": 1.434415340423584, "step": 760 }, { "epoch": 0.1812397799910807, "grad_norm": 0.490234375, "learning_rate": 8e-05, "loss": 1.3801504373550415, "step": 762 }, { "epoch": 0.18171547495168722, "grad_norm": 0.486328125, "learning_rate": 8e-05, "loss": 1.3628723621368408, "step": 764 }, { "epoch": 0.18219116991229375, "grad_norm": 0.46484375, "learning_rate": 8e-05, "loss": 1.3805229663848877, "step": 766 }, { "epoch": 0.18266686487290026, "grad_norm": 0.494140625, "learning_rate": 8e-05, "loss": 1.3568819761276245, "step": 768 }, { "epoch": 0.18314255983350677, "grad_norm": 0.451171875, "learning_rate": 8e-05, "loss": 1.3049235343933105, "step": 770 }, { "epoch": 0.18361825479411328, "grad_norm": 0.46875, "learning_rate": 8e-05, "loss": 1.3783180713653564, "step": 772 }, { "epoch": 0.1840939497547198, "grad_norm": 0.48046875, "learning_rate": 8e-05, "loss": 1.3785371780395508, "step": 774 }, { "epoch": 0.1845696447153263, "grad_norm": 0.455078125, "learning_rate": 8e-05, "loss": 1.4072458744049072, "step": 776 }, { "epoch": 0.1850453396759328, "grad_norm": 0.47265625, "learning_rate": 8e-05, "loss": 1.3426545858383179, "step": 778 }, { "epoch": 0.18552103463653932, "grad_norm": 0.453125, "learning_rate": 8e-05, "loss": 1.352428674697876, "step": 780 }, { "epoch": 0.18599672959714583, "grad_norm": 0.46875, "learning_rate": 8e-05, "loss": 1.3136948347091675, "step": 782 }, { "epoch": 0.18647242455775234, "grad_norm": 0.5, "learning_rate": 8e-05, "loss": 1.3649238348007202, "step": 784 }, { "epoch": 0.18694811951835885, "grad_norm": 0.50390625, "learning_rate": 8e-05, "loss": 1.4003939628601074, "step": 786 }, { "epoch": 0.18742381447896536, "grad_norm": 0.470703125, "learning_rate": 8e-05, "loss": 1.3522775173187256, "step": 788 }, { "epoch": 0.18789950943957187, "grad_norm": 0.478515625, "learning_rate": 8e-05, "loss": 1.353920578956604, "step": 790 }, { "epoch": 0.18837520440017838, "grad_norm": 0.458984375, "learning_rate": 8e-05, "loss": 1.4120471477508545, "step": 792 }, { "epoch": 0.1888508993607849, "grad_norm": 0.451171875, "learning_rate": 8e-05, "loss": 1.425047755241394, "step": 794 }, { "epoch": 0.1893265943213914, "grad_norm": 0.4375, "learning_rate": 8e-05, "loss": 1.3698722124099731, "step": 796 }, { "epoch": 0.1898022892819979, "grad_norm": 0.451171875, "learning_rate": 8e-05, "loss": 1.3825695514678955, "step": 798 }, { "epoch": 0.19027798424260442, "grad_norm": 0.466796875, "learning_rate": 8e-05, "loss": 1.384330153465271, "step": 800 }, { "epoch": 0.19075367920321093, "grad_norm": 0.4765625, "learning_rate": 8e-05, "loss": 1.365710735321045, "step": 802 }, { "epoch": 0.19122937416381744, "grad_norm": 0.466796875, "learning_rate": 8e-05, "loss": 1.351928472518921, "step": 804 }, { "epoch": 0.19170506912442398, "grad_norm": 0.458984375, "learning_rate": 8e-05, "loss": 1.364558458328247, "step": 806 }, { "epoch": 0.1921807640850305, "grad_norm": 0.466796875, "learning_rate": 8e-05, "loss": 1.4033458232879639, "step": 808 }, { "epoch": 0.192656459045637, "grad_norm": 0.494140625, "learning_rate": 8e-05, "loss": 1.378347635269165, "step": 810 }, { "epoch": 0.1931321540062435, "grad_norm": 0.48828125, "learning_rate": 8e-05, "loss": 1.364283561706543, "step": 812 }, { "epoch": 0.19360784896685002, "grad_norm": 0.53125, "learning_rate": 8e-05, "loss": 1.414649248123169, "step": 814 }, { "epoch": 0.19408354392745653, "grad_norm": 0.4453125, "learning_rate": 8e-05, "loss": 1.3634638786315918, "step": 816 }, { "epoch": 0.19455923888806304, "grad_norm": 0.55859375, "learning_rate": 8e-05, "loss": 1.4743528366088867, "step": 818 }, { "epoch": 0.19503493384866954, "grad_norm": 0.52734375, "learning_rate": 8e-05, "loss": 1.3400163650512695, "step": 820 }, { "epoch": 0.19551062880927605, "grad_norm": 0.48828125, "learning_rate": 8e-05, "loss": 1.3880252838134766, "step": 822 }, { "epoch": 0.19598632376988256, "grad_norm": 0.5, "learning_rate": 8e-05, "loss": 1.395135521888733, "step": 824 }, { "epoch": 0.19646201873048907, "grad_norm": 0.4609375, "learning_rate": 8e-05, "loss": 1.3433012962341309, "step": 826 }, { "epoch": 0.19693771369109558, "grad_norm": 0.462890625, "learning_rate": 8e-05, "loss": 1.317229986190796, "step": 828 }, { "epoch": 0.1974134086517021, "grad_norm": 0.431640625, "learning_rate": 8e-05, "loss": 1.3179906606674194, "step": 830 }, { "epoch": 0.1978891036123086, "grad_norm": 0.451171875, "learning_rate": 8e-05, "loss": 1.3207850456237793, "step": 832 }, { "epoch": 0.1983647985729151, "grad_norm": 0.484375, "learning_rate": 8e-05, "loss": 1.4160897731781006, "step": 834 }, { "epoch": 0.19884049353352162, "grad_norm": 0.51953125, "learning_rate": 8e-05, "loss": 1.34122633934021, "step": 836 }, { "epoch": 0.19931618849412813, "grad_norm": 0.486328125, "learning_rate": 8e-05, "loss": 1.3531912565231323, "step": 838 }, { "epoch": 0.19979188345473464, "grad_norm": 0.482421875, "learning_rate": 8e-05, "loss": 1.3703558444976807, "step": 840 }, { "epoch": 0.20026757841534115, "grad_norm": 0.46875, "learning_rate": 8e-05, "loss": 1.3876454830169678, "step": 842 }, { "epoch": 0.20074327337594766, "grad_norm": 0.453125, "learning_rate": 8e-05, "loss": 1.3795206546783447, "step": 844 }, { "epoch": 0.20121896833655417, "grad_norm": 0.482421875, "learning_rate": 8e-05, "loss": 1.3170604705810547, "step": 846 }, { "epoch": 0.2016946632971607, "grad_norm": 0.5078125, "learning_rate": 8e-05, "loss": 1.3548598289489746, "step": 848 }, { "epoch": 0.20217035825776722, "grad_norm": 0.5546875, "learning_rate": 8e-05, "loss": 1.359254002571106, "step": 850 }, { "epoch": 0.20264605321837373, "grad_norm": 0.46484375, "learning_rate": 8e-05, "loss": 1.389087438583374, "step": 852 }, { "epoch": 0.20312174817898024, "grad_norm": 0.447265625, "learning_rate": 8e-05, "loss": 1.3296732902526855, "step": 854 }, { "epoch": 0.20359744313958675, "grad_norm": 0.486328125, "learning_rate": 8e-05, "loss": 1.3714617490768433, "step": 856 }, { "epoch": 0.20407313810019326, "grad_norm": 0.4765625, "learning_rate": 8e-05, "loss": 1.3371829986572266, "step": 858 }, { "epoch": 0.20454883306079977, "grad_norm": 0.498046875, "learning_rate": 8e-05, "loss": 1.386389970779419, "step": 860 }, { "epoch": 0.20502452802140628, "grad_norm": 0.46484375, "learning_rate": 8e-05, "loss": 1.338608741760254, "step": 862 }, { "epoch": 0.2055002229820128, "grad_norm": 0.515625, "learning_rate": 8e-05, "loss": 1.328315019607544, "step": 864 }, { "epoch": 0.2059759179426193, "grad_norm": 0.4921875, "learning_rate": 8e-05, "loss": 1.3264660835266113, "step": 866 }, { "epoch": 0.2064516129032258, "grad_norm": 0.484375, "learning_rate": 8e-05, "loss": 1.4002896547317505, "step": 868 }, { "epoch": 0.20692730786383232, "grad_norm": 0.5, "learning_rate": 8e-05, "loss": 1.3713188171386719, "step": 870 }, { "epoch": 0.20740300282443883, "grad_norm": 0.5078125, "learning_rate": 8e-05, "loss": 1.350874900817871, "step": 872 }, { "epoch": 0.20787869778504534, "grad_norm": 0.5078125, "learning_rate": 8e-05, "loss": 1.3784689903259277, "step": 874 }, { "epoch": 0.20835439274565185, "grad_norm": 0.59765625, "learning_rate": 8e-05, "loss": 1.3428910970687866, "step": 876 }, { "epoch": 0.20883008770625835, "grad_norm": 0.48828125, "learning_rate": 8e-05, "loss": 1.3714317083358765, "step": 878 }, { "epoch": 0.20930578266686486, "grad_norm": 0.494140625, "learning_rate": 8e-05, "loss": 1.3759992122650146, "step": 880 }, { "epoch": 0.20978147762747137, "grad_norm": 0.484375, "learning_rate": 8e-05, "loss": 1.2990326881408691, "step": 882 }, { "epoch": 0.21025717258807788, "grad_norm": 0.462890625, "learning_rate": 8e-05, "loss": 1.3624963760375977, "step": 884 }, { "epoch": 0.2107328675486844, "grad_norm": 0.44140625, "learning_rate": 8e-05, "loss": 1.3324933052062988, "step": 886 }, { "epoch": 0.2112085625092909, "grad_norm": 0.44921875, "learning_rate": 8e-05, "loss": 1.4013808965682983, "step": 888 }, { "epoch": 0.21168425746989744, "grad_norm": 0.4765625, "learning_rate": 8e-05, "loss": 1.338510274887085, "step": 890 }, { "epoch": 0.21215995243050395, "grad_norm": 0.5078125, "learning_rate": 8e-05, "loss": 1.4126381874084473, "step": 892 }, { "epoch": 0.21263564739111046, "grad_norm": 0.484375, "learning_rate": 8e-05, "loss": 1.371493935585022, "step": 894 }, { "epoch": 0.21311134235171697, "grad_norm": 0.486328125, "learning_rate": 8e-05, "loss": 1.383441686630249, "step": 896 }, { "epoch": 0.21358703731232348, "grad_norm": 0.51953125, "learning_rate": 8e-05, "loss": 1.3659964799880981, "step": 898 }, { "epoch": 0.21406273227293, "grad_norm": 0.484375, "learning_rate": 8e-05, "loss": 1.2908456325531006, "step": 900 }, { "epoch": 0.2145384272335365, "grad_norm": 0.498046875, "learning_rate": 8e-05, "loss": 1.3610074520111084, "step": 902 }, { "epoch": 0.215014122194143, "grad_norm": 0.48828125, "learning_rate": 8e-05, "loss": 1.3580766916275024, "step": 904 }, { "epoch": 0.21548981715474952, "grad_norm": 0.4921875, "learning_rate": 8e-05, "loss": 1.458742618560791, "step": 906 }, { "epoch": 0.21596551211535603, "grad_norm": 0.4921875, "learning_rate": 8e-05, "loss": 1.2720565795898438, "step": 908 }, { "epoch": 0.21644120707596254, "grad_norm": 0.44140625, "learning_rate": 8e-05, "loss": 1.3226542472839355, "step": 910 }, { "epoch": 0.21691690203656905, "grad_norm": 0.46875, "learning_rate": 8e-05, "loss": 1.3100987672805786, "step": 912 }, { "epoch": 0.21739259699717556, "grad_norm": 0.482421875, "learning_rate": 8e-05, "loss": 1.3754308223724365, "step": 914 }, { "epoch": 0.21786829195778207, "grad_norm": 0.53515625, "learning_rate": 8e-05, "loss": 1.3694303035736084, "step": 916 }, { "epoch": 0.21834398691838858, "grad_norm": 0.515625, "learning_rate": 8e-05, "loss": 1.394423007965088, "step": 918 }, { "epoch": 0.2188196818789951, "grad_norm": 0.4609375, "learning_rate": 8e-05, "loss": 1.3577532768249512, "step": 920 }, { "epoch": 0.2192953768396016, "grad_norm": 0.453125, "learning_rate": 8e-05, "loss": 1.2522318363189697, "step": 922 }, { "epoch": 0.2197710718002081, "grad_norm": 0.466796875, "learning_rate": 8e-05, "loss": 1.3532583713531494, "step": 924 }, { "epoch": 0.22024676676081462, "grad_norm": 0.5078125, "learning_rate": 8e-05, "loss": 1.3816845417022705, "step": 926 }, { "epoch": 0.22072246172142113, "grad_norm": 0.494140625, "learning_rate": 8e-05, "loss": 1.362253189086914, "step": 928 }, { "epoch": 0.22119815668202766, "grad_norm": 0.494140625, "learning_rate": 8e-05, "loss": 1.3231050968170166, "step": 930 }, { "epoch": 0.22167385164263417, "grad_norm": 0.486328125, "learning_rate": 8e-05, "loss": 1.3791143894195557, "step": 932 }, { "epoch": 0.22214954660324068, "grad_norm": 0.470703125, "learning_rate": 8e-05, "loss": 1.3647040128707886, "step": 934 }, { "epoch": 0.2226252415638472, "grad_norm": 0.45703125, "learning_rate": 8e-05, "loss": 1.3556348085403442, "step": 936 }, { "epoch": 0.2231009365244537, "grad_norm": 0.435546875, "learning_rate": 8e-05, "loss": 1.3153495788574219, "step": 938 }, { "epoch": 0.2235766314850602, "grad_norm": 0.3984375, "learning_rate": 8e-05, "loss": 1.2898294925689697, "step": 940 }, { "epoch": 0.22405232644566672, "grad_norm": 0.419921875, "learning_rate": 8e-05, "loss": 1.3297260999679565, "step": 942 }, { "epoch": 0.22452802140627323, "grad_norm": 0.4375, "learning_rate": 8e-05, "loss": 1.3505053520202637, "step": 944 }, { "epoch": 0.22500371636687974, "grad_norm": 0.439453125, "learning_rate": 8e-05, "loss": 1.3983497619628906, "step": 946 }, { "epoch": 0.22547941132748625, "grad_norm": 0.431640625, "learning_rate": 8e-05, "loss": 1.3593679666519165, "step": 948 }, { "epoch": 0.22595510628809276, "grad_norm": 0.4296875, "learning_rate": 8e-05, "loss": 1.336477518081665, "step": 950 }, { "epoch": 0.22643080124869927, "grad_norm": 0.453125, "learning_rate": 8e-05, "loss": 1.3708462715148926, "step": 952 }, { "epoch": 0.22690649620930578, "grad_norm": 0.439453125, "learning_rate": 8e-05, "loss": 1.4065918922424316, "step": 954 }, { "epoch": 0.2273821911699123, "grad_norm": 0.484375, "learning_rate": 8e-05, "loss": 1.361413836479187, "step": 956 }, { "epoch": 0.2278578861305188, "grad_norm": 0.49609375, "learning_rate": 8e-05, "loss": 1.3337655067443848, "step": 958 }, { "epoch": 0.2283335810911253, "grad_norm": 0.443359375, "learning_rate": 8e-05, "loss": 1.2802634239196777, "step": 960 }, { "epoch": 0.22880927605173182, "grad_norm": 0.46484375, "learning_rate": 8e-05, "loss": 1.3333477973937988, "step": 962 }, { "epoch": 0.22928497101233833, "grad_norm": 0.48046875, "learning_rate": 8e-05, "loss": 1.3998594284057617, "step": 964 }, { "epoch": 0.22976066597294484, "grad_norm": 0.439453125, "learning_rate": 8e-05, "loss": 1.3377106189727783, "step": 966 }, { "epoch": 0.23023636093355135, "grad_norm": 0.458984375, "learning_rate": 8e-05, "loss": 1.2901934385299683, "step": 968 }, { "epoch": 0.23071205589415786, "grad_norm": 0.498046875, "learning_rate": 8e-05, "loss": 1.3435245752334595, "step": 970 }, { "epoch": 0.2311877508547644, "grad_norm": 0.486328125, "learning_rate": 8e-05, "loss": 1.3464173078536987, "step": 972 }, { "epoch": 0.2316634458153709, "grad_norm": 0.453125, "learning_rate": 8e-05, "loss": 1.3196808099746704, "step": 974 }, { "epoch": 0.23213914077597742, "grad_norm": 0.462890625, "learning_rate": 8e-05, "loss": 1.3490209579467773, "step": 976 }, { "epoch": 0.23261483573658392, "grad_norm": 0.474609375, "learning_rate": 8e-05, "loss": 1.3755543231964111, "step": 978 }, { "epoch": 0.23309053069719043, "grad_norm": 0.455078125, "learning_rate": 8e-05, "loss": 1.3209686279296875, "step": 980 }, { "epoch": 0.23356622565779694, "grad_norm": 0.474609375, "learning_rate": 8e-05, "loss": 1.3944644927978516, "step": 982 }, { "epoch": 0.23404192061840345, "grad_norm": 0.455078125, "learning_rate": 8e-05, "loss": 1.3737695217132568, "step": 984 }, { "epoch": 0.23451761557900996, "grad_norm": 0.447265625, "learning_rate": 8e-05, "loss": 1.352348804473877, "step": 986 }, { "epoch": 0.23499331053961647, "grad_norm": 0.42578125, "learning_rate": 8e-05, "loss": 1.2491270303726196, "step": 988 }, { "epoch": 0.23546900550022298, "grad_norm": 0.478515625, "learning_rate": 8e-05, "loss": 1.4017226696014404, "step": 990 }, { "epoch": 0.2359447004608295, "grad_norm": 0.46875, "learning_rate": 8e-05, "loss": 1.3329687118530273, "step": 992 }, { "epoch": 0.236420395421436, "grad_norm": 0.44921875, "learning_rate": 8e-05, "loss": 1.2956058979034424, "step": 994 }, { "epoch": 0.2368960903820425, "grad_norm": 0.462890625, "learning_rate": 8e-05, "loss": 1.376798391342163, "step": 996 }, { "epoch": 0.23737178534264902, "grad_norm": 0.486328125, "learning_rate": 8e-05, "loss": 1.3533029556274414, "step": 998 }, { "epoch": 0.23784748030325553, "grad_norm": 0.4453125, "learning_rate": 8e-05, "loss": 1.3347084522247314, "step": 1000 }, { "epoch": 0.23832317526386204, "grad_norm": 0.4296875, "learning_rate": 8e-05, "loss": 1.3693647384643555, "step": 1002 }, { "epoch": 0.23879887022446855, "grad_norm": 0.4453125, "learning_rate": 8e-05, "loss": 1.3653826713562012, "step": 1004 }, { "epoch": 0.23927456518507506, "grad_norm": 0.427734375, "learning_rate": 8e-05, "loss": 1.345597505569458, "step": 1006 }, { "epoch": 0.23975026014568157, "grad_norm": 0.443359375, "learning_rate": 8e-05, "loss": 1.2825236320495605, "step": 1008 }, { "epoch": 0.24022595510628808, "grad_norm": 0.44140625, "learning_rate": 8e-05, "loss": 1.3502631187438965, "step": 1010 }, { "epoch": 0.2407016500668946, "grad_norm": 0.41015625, "learning_rate": 8e-05, "loss": 1.3079496622085571, "step": 1012 }, { "epoch": 0.24117734502750113, "grad_norm": 0.423828125, "learning_rate": 8e-05, "loss": 1.3474180698394775, "step": 1014 }, { "epoch": 0.24165303998810764, "grad_norm": 0.4765625, "learning_rate": 8e-05, "loss": 1.3570088148117065, "step": 1016 }, { "epoch": 0.24212873494871415, "grad_norm": 0.466796875, "learning_rate": 8e-05, "loss": 1.3702566623687744, "step": 1018 }, { "epoch": 0.24260442990932066, "grad_norm": 0.453125, "learning_rate": 8e-05, "loss": 1.3773030042648315, "step": 1020 }, { "epoch": 0.24308012486992717, "grad_norm": 0.470703125, "learning_rate": 8e-05, "loss": 1.333245873451233, "step": 1022 }, { "epoch": 0.24355581983053368, "grad_norm": 0.4609375, "learning_rate": 8e-05, "loss": 1.3305965662002563, "step": 1024 }, { "epoch": 0.24403151479114019, "grad_norm": 0.435546875, "learning_rate": 8e-05, "loss": 1.3149254322052002, "step": 1026 }, { "epoch": 0.2445072097517467, "grad_norm": 0.423828125, "learning_rate": 8e-05, "loss": 1.34800124168396, "step": 1028 }, { "epoch": 0.2449829047123532, "grad_norm": 0.443359375, "learning_rate": 8e-05, "loss": 1.3632348775863647, "step": 1030 }, { "epoch": 0.24545859967295972, "grad_norm": 0.482421875, "learning_rate": 8e-05, "loss": 1.3612074851989746, "step": 1032 }, { "epoch": 0.24593429463356622, "grad_norm": 0.439453125, "learning_rate": 8e-05, "loss": 1.3488757610321045, "step": 1034 }, { "epoch": 0.24640998959417273, "grad_norm": 0.451171875, "learning_rate": 8e-05, "loss": 1.3559046983718872, "step": 1036 }, { "epoch": 0.24688568455477924, "grad_norm": 0.453125, "learning_rate": 8e-05, "loss": 1.3708908557891846, "step": 1038 }, { "epoch": 0.24736137951538575, "grad_norm": 0.44140625, "learning_rate": 8e-05, "loss": 1.33760666847229, "step": 1040 }, { "epoch": 0.24783707447599226, "grad_norm": 0.427734375, "learning_rate": 8e-05, "loss": 1.2863125801086426, "step": 1042 }, { "epoch": 0.24831276943659877, "grad_norm": 0.435546875, "learning_rate": 8e-05, "loss": 1.3174580335617065, "step": 1044 }, { "epoch": 0.24878846439720528, "grad_norm": 0.45703125, "learning_rate": 8e-05, "loss": 1.3641953468322754, "step": 1046 }, { "epoch": 0.2492641593578118, "grad_norm": 0.443359375, "learning_rate": 8e-05, "loss": 1.3469069004058838, "step": 1048 }, { "epoch": 0.2497398543184183, "grad_norm": 0.455078125, "learning_rate": 8e-05, "loss": 1.3538458347320557, "step": 1050 }, { "epoch": 0.25021554927902484, "grad_norm": 0.48046875, "learning_rate": 8e-05, "loss": 1.354750633239746, "step": 1052 }, { "epoch": 0.25069124423963135, "grad_norm": 0.4609375, "learning_rate": 8e-05, "loss": 1.3567293882369995, "step": 1054 }, { "epoch": 0.25116693920023786, "grad_norm": 0.4921875, "learning_rate": 8e-05, "loss": 1.3444650173187256, "step": 1056 }, { "epoch": 0.25164263416084437, "grad_norm": 0.4296875, "learning_rate": 8e-05, "loss": 1.3146984577178955, "step": 1058 }, { "epoch": 0.2521183291214509, "grad_norm": 0.486328125, "learning_rate": 8e-05, "loss": 1.3101708889007568, "step": 1060 }, { "epoch": 0.2525940240820574, "grad_norm": 0.48046875, "learning_rate": 8e-05, "loss": 1.3136630058288574, "step": 1062 }, { "epoch": 0.2530697190426639, "grad_norm": 0.462890625, "learning_rate": 8e-05, "loss": 1.35286545753479, "step": 1064 }, { "epoch": 0.2535454140032704, "grad_norm": 0.462890625, "learning_rate": 8e-05, "loss": 1.319314956665039, "step": 1066 }, { "epoch": 0.2540211089638769, "grad_norm": 0.455078125, "learning_rate": 8e-05, "loss": 1.3844151496887207, "step": 1068 }, { "epoch": 0.25449680392448343, "grad_norm": 0.421875, "learning_rate": 8e-05, "loss": 1.2962524890899658, "step": 1070 }, { "epoch": 0.25497249888508994, "grad_norm": 0.44921875, "learning_rate": 8e-05, "loss": 1.3922219276428223, "step": 1072 }, { "epoch": 0.25544819384569645, "grad_norm": 0.435546875, "learning_rate": 8e-05, "loss": 1.3131260871887207, "step": 1074 }, { "epoch": 0.25592388880630296, "grad_norm": 0.423828125, "learning_rate": 8e-05, "loss": 1.322244644165039, "step": 1076 }, { "epoch": 0.25639958376690947, "grad_norm": 0.462890625, "learning_rate": 8e-05, "loss": 1.3516499996185303, "step": 1078 }, { "epoch": 0.256875278727516, "grad_norm": 0.470703125, "learning_rate": 8e-05, "loss": 1.3630871772766113, "step": 1080 }, { "epoch": 0.2573509736881225, "grad_norm": 0.431640625, "learning_rate": 8e-05, "loss": 1.3094751834869385, "step": 1082 }, { "epoch": 0.257826668648729, "grad_norm": 0.484375, "learning_rate": 8e-05, "loss": 1.306774616241455, "step": 1084 }, { "epoch": 0.2583023636093355, "grad_norm": 0.443359375, "learning_rate": 8e-05, "loss": 1.3254430294036865, "step": 1086 }, { "epoch": 0.258778058569942, "grad_norm": 0.45703125, "learning_rate": 8e-05, "loss": 1.306220293045044, "step": 1088 }, { "epoch": 0.2592537535305485, "grad_norm": 0.431640625, "learning_rate": 8e-05, "loss": 1.337794303894043, "step": 1090 }, { "epoch": 0.25972944849115503, "grad_norm": 0.42578125, "learning_rate": 8e-05, "loss": 1.324033260345459, "step": 1092 }, { "epoch": 0.26020514345176154, "grad_norm": 0.46484375, "learning_rate": 8e-05, "loss": 1.3060619831085205, "step": 1094 }, { "epoch": 0.26068083841236805, "grad_norm": 0.46875, "learning_rate": 8e-05, "loss": 1.411613941192627, "step": 1096 }, { "epoch": 0.26115653337297456, "grad_norm": 0.451171875, "learning_rate": 8e-05, "loss": 1.3333206176757812, "step": 1098 }, { "epoch": 0.2616322283335811, "grad_norm": 0.494140625, "learning_rate": 8e-05, "loss": 1.3340492248535156, "step": 1100 }, { "epoch": 0.2621079232941876, "grad_norm": 0.5234375, "learning_rate": 8e-05, "loss": 1.3538923263549805, "step": 1102 }, { "epoch": 0.2625836182547941, "grad_norm": 0.443359375, "learning_rate": 8e-05, "loss": 1.2844221591949463, "step": 1104 }, { "epoch": 0.2630593132154006, "grad_norm": 0.41015625, "learning_rate": 8e-05, "loss": 1.2786577939987183, "step": 1106 }, { "epoch": 0.2635350081760071, "grad_norm": 0.408203125, "learning_rate": 8e-05, "loss": 1.3393871784210205, "step": 1108 }, { "epoch": 0.2640107031366136, "grad_norm": 0.451171875, "learning_rate": 8e-05, "loss": 1.3317300081253052, "step": 1110 }, { "epoch": 0.26448639809722013, "grad_norm": 0.470703125, "learning_rate": 8e-05, "loss": 1.329606056213379, "step": 1112 }, { "epoch": 0.26496209305782664, "grad_norm": 0.455078125, "learning_rate": 8e-05, "loss": 1.3412857055664062, "step": 1114 }, { "epoch": 0.2654377880184332, "grad_norm": 0.44140625, "learning_rate": 8e-05, "loss": 1.3141382932662964, "step": 1116 }, { "epoch": 0.2659134829790397, "grad_norm": 0.44921875, "learning_rate": 8e-05, "loss": 1.3066372871398926, "step": 1118 }, { "epoch": 0.2663891779396462, "grad_norm": 0.416015625, "learning_rate": 8e-05, "loss": 1.336663007736206, "step": 1120 }, { "epoch": 0.26686487290025274, "grad_norm": 0.396484375, "learning_rate": 8e-05, "loss": 1.3393672704696655, "step": 1122 }, { "epoch": 0.26734056786085925, "grad_norm": 0.4296875, "learning_rate": 8e-05, "loss": 1.338028907775879, "step": 1124 }, { "epoch": 0.26781626282146576, "grad_norm": 0.447265625, "learning_rate": 8e-05, "loss": 1.25938880443573, "step": 1126 }, { "epoch": 0.26829195778207227, "grad_norm": 0.5, "learning_rate": 8e-05, "loss": 1.3836978673934937, "step": 1128 }, { "epoch": 0.2687676527426788, "grad_norm": 0.45703125, "learning_rate": 8e-05, "loss": 1.3326656818389893, "step": 1130 }, { "epoch": 0.2692433477032853, "grad_norm": 0.470703125, "learning_rate": 8e-05, "loss": 1.2927348613739014, "step": 1132 }, { "epoch": 0.2697190426638918, "grad_norm": 0.52734375, "learning_rate": 8e-05, "loss": 1.4277849197387695, "step": 1134 }, { "epoch": 0.2701947376244983, "grad_norm": 0.498046875, "learning_rate": 8e-05, "loss": 1.3989144563674927, "step": 1136 }, { "epoch": 0.2706704325851048, "grad_norm": 0.45703125, "learning_rate": 8e-05, "loss": 1.3444643020629883, "step": 1138 }, { "epoch": 0.2711461275457113, "grad_norm": 0.42578125, "learning_rate": 8e-05, "loss": 1.2833266258239746, "step": 1140 }, { "epoch": 0.27162182250631783, "grad_norm": 0.455078125, "learning_rate": 8e-05, "loss": 1.358147382736206, "step": 1142 }, { "epoch": 0.27209751746692434, "grad_norm": 0.443359375, "learning_rate": 8e-05, "loss": 1.3105173110961914, "step": 1144 }, { "epoch": 0.27257321242753085, "grad_norm": 0.478515625, "learning_rate": 8e-05, "loss": 1.3114371299743652, "step": 1146 }, { "epoch": 0.27304890738813736, "grad_norm": 0.443359375, "learning_rate": 8e-05, "loss": 1.2814993858337402, "step": 1148 }, { "epoch": 0.2735246023487439, "grad_norm": 0.455078125, "learning_rate": 8e-05, "loss": 1.3203294277191162, "step": 1150 }, { "epoch": 0.2740002973093504, "grad_norm": 0.431640625, "learning_rate": 8e-05, "loss": 1.2979755401611328, "step": 1152 }, { "epoch": 0.2744759922699569, "grad_norm": 0.42578125, "learning_rate": 8e-05, "loss": 1.3081634044647217, "step": 1154 }, { "epoch": 0.2749516872305634, "grad_norm": 0.423828125, "learning_rate": 8e-05, "loss": 1.3281807899475098, "step": 1156 }, { "epoch": 0.2754273821911699, "grad_norm": 0.42578125, "learning_rate": 8e-05, "loss": 1.3202593326568604, "step": 1158 }, { "epoch": 0.2759030771517764, "grad_norm": 0.43359375, "learning_rate": 8e-05, "loss": 1.3806310892105103, "step": 1160 }, { "epoch": 0.27637877211238293, "grad_norm": 0.43359375, "learning_rate": 8e-05, "loss": 1.3173789978027344, "step": 1162 }, { "epoch": 0.27685446707298944, "grad_norm": 0.41796875, "learning_rate": 8e-05, "loss": 1.3223962783813477, "step": 1164 }, { "epoch": 0.27733016203359595, "grad_norm": 0.421875, "learning_rate": 8e-05, "loss": 1.3117542266845703, "step": 1166 }, { "epoch": 0.27780585699420246, "grad_norm": 0.416015625, "learning_rate": 8e-05, "loss": 1.3533828258514404, "step": 1168 }, { "epoch": 0.27828155195480897, "grad_norm": 0.427734375, "learning_rate": 8e-05, "loss": 1.3214187622070312, "step": 1170 }, { "epoch": 0.2787572469154155, "grad_norm": 0.458984375, "learning_rate": 8e-05, "loss": 1.3294625282287598, "step": 1172 }, { "epoch": 0.279232941876022, "grad_norm": 0.42578125, "learning_rate": 8e-05, "loss": 1.3171840906143188, "step": 1174 }, { "epoch": 0.2797086368366285, "grad_norm": 0.439453125, "learning_rate": 8e-05, "loss": 1.3028992414474487, "step": 1176 }, { "epoch": 0.280184331797235, "grad_norm": 0.47265625, "learning_rate": 8e-05, "loss": 1.3446723222732544, "step": 1178 }, { "epoch": 0.2806600267578415, "grad_norm": 0.462890625, "learning_rate": 8e-05, "loss": 1.296749472618103, "step": 1180 }, { "epoch": 0.28113572171844803, "grad_norm": 0.5078125, "learning_rate": 8e-05, "loss": 1.3563461303710938, "step": 1182 }, { "epoch": 0.28161141667905454, "grad_norm": 0.5, "learning_rate": 8e-05, "loss": 1.3181467056274414, "step": 1184 }, { "epoch": 0.28208711163966105, "grad_norm": 0.48046875, "learning_rate": 8e-05, "loss": 1.3786540031433105, "step": 1186 }, { "epoch": 0.28256280660026756, "grad_norm": 0.439453125, "learning_rate": 8e-05, "loss": 1.3328609466552734, "step": 1188 }, { "epoch": 0.28303850156087407, "grad_norm": 0.439453125, "learning_rate": 8e-05, "loss": 1.3462462425231934, "step": 1190 }, { "epoch": 0.2835141965214806, "grad_norm": 0.455078125, "learning_rate": 8e-05, "loss": 1.3369724750518799, "step": 1192 }, { "epoch": 0.2839898914820871, "grad_norm": 0.43359375, "learning_rate": 8e-05, "loss": 1.3191611766815186, "step": 1194 }, { "epoch": 0.2844655864426936, "grad_norm": 0.443359375, "learning_rate": 8e-05, "loss": 1.3197510242462158, "step": 1196 }, { "epoch": 0.2849412814033001, "grad_norm": 0.45703125, "learning_rate": 8e-05, "loss": 1.3486484289169312, "step": 1198 }, { "epoch": 0.28541697636390667, "grad_norm": 0.44140625, "learning_rate": 8e-05, "loss": 1.3362209796905518, "step": 1200 }, { "epoch": 0.2858926713245132, "grad_norm": 0.427734375, "learning_rate": 8e-05, "loss": 1.322070837020874, "step": 1202 }, { "epoch": 0.2863683662851197, "grad_norm": 0.431640625, "learning_rate": 8e-05, "loss": 1.3231661319732666, "step": 1204 }, { "epoch": 0.2868440612457262, "grad_norm": 0.421875, "learning_rate": 8e-05, "loss": 1.2671769857406616, "step": 1206 }, { "epoch": 0.2873197562063327, "grad_norm": 0.451171875, "learning_rate": 8e-05, "loss": 1.3193705081939697, "step": 1208 }, { "epoch": 0.2877954511669392, "grad_norm": 0.42578125, "learning_rate": 8e-05, "loss": 1.3454172611236572, "step": 1210 }, { "epoch": 0.28827114612754573, "grad_norm": 0.4375, "learning_rate": 8e-05, "loss": 1.351203441619873, "step": 1212 }, { "epoch": 0.28874684108815224, "grad_norm": 0.43359375, "learning_rate": 8e-05, "loss": 1.3364512920379639, "step": 1214 }, { "epoch": 0.28922253604875875, "grad_norm": 0.46875, "learning_rate": 8e-05, "loss": 1.3189178705215454, "step": 1216 }, { "epoch": 0.28969823100936526, "grad_norm": 0.48828125, "learning_rate": 8e-05, "loss": 1.2867472171783447, "step": 1218 }, { "epoch": 0.29017392596997177, "grad_norm": 0.46484375, "learning_rate": 8e-05, "loss": 1.3088388442993164, "step": 1220 }, { "epoch": 0.2906496209305783, "grad_norm": 0.431640625, "learning_rate": 8e-05, "loss": 1.2755553722381592, "step": 1222 }, { "epoch": 0.2911253158911848, "grad_norm": 0.44140625, "learning_rate": 8e-05, "loss": 1.3116247653961182, "step": 1224 }, { "epoch": 0.2916010108517913, "grad_norm": 0.4453125, "learning_rate": 8e-05, "loss": 1.2760411500930786, "step": 1226 }, { "epoch": 0.2920767058123978, "grad_norm": 0.435546875, "learning_rate": 8e-05, "loss": 1.3090481758117676, "step": 1228 }, { "epoch": 0.2925524007730043, "grad_norm": 0.439453125, "learning_rate": 8e-05, "loss": 1.3445281982421875, "step": 1230 }, { "epoch": 0.2930280957336108, "grad_norm": 0.427734375, "learning_rate": 8e-05, "loss": 1.3193836212158203, "step": 1232 }, { "epoch": 0.29350379069421734, "grad_norm": 0.3984375, "learning_rate": 8e-05, "loss": 1.20814847946167, "step": 1234 }, { "epoch": 0.29397948565482385, "grad_norm": 0.419921875, "learning_rate": 8e-05, "loss": 1.266977310180664, "step": 1236 }, { "epoch": 0.29445518061543036, "grad_norm": 0.486328125, "learning_rate": 8e-05, "loss": 1.3388676643371582, "step": 1238 }, { "epoch": 0.29493087557603687, "grad_norm": 0.46484375, "learning_rate": 8e-05, "loss": 1.3216158151626587, "step": 1240 }, { "epoch": 0.2954065705366434, "grad_norm": 0.4609375, "learning_rate": 8e-05, "loss": 1.3377256393432617, "step": 1242 }, { "epoch": 0.2958822654972499, "grad_norm": 0.45703125, "learning_rate": 8e-05, "loss": 1.3120627403259277, "step": 1244 }, { "epoch": 0.2963579604578564, "grad_norm": 0.482421875, "learning_rate": 8e-05, "loss": 1.38155198097229, "step": 1246 }, { "epoch": 0.2968336554184629, "grad_norm": 0.44921875, "learning_rate": 8e-05, "loss": 1.3631592988967896, "step": 1248 }, { "epoch": 0.2973093503790694, "grad_norm": 0.466796875, "learning_rate": 8e-05, "loss": 1.342321515083313, "step": 1250 }, { "epoch": 0.2977850453396759, "grad_norm": 0.478515625, "learning_rate": 8e-05, "loss": 1.391056776046753, "step": 1252 }, { "epoch": 0.29826074030028243, "grad_norm": 0.447265625, "learning_rate": 8e-05, "loss": 1.275477409362793, "step": 1254 }, { "epoch": 0.29873643526088894, "grad_norm": 0.462890625, "learning_rate": 8e-05, "loss": 1.3247549533843994, "step": 1256 }, { "epoch": 0.29921213022149545, "grad_norm": 0.439453125, "learning_rate": 8e-05, "loss": 1.3454852104187012, "step": 1258 }, { "epoch": 0.29968782518210196, "grad_norm": 0.46875, "learning_rate": 8e-05, "loss": 1.3124552965164185, "step": 1260 }, { "epoch": 0.3001635201427085, "grad_norm": 0.43359375, "learning_rate": 8e-05, "loss": 1.309496521949768, "step": 1262 }, { "epoch": 0.300639215103315, "grad_norm": 0.412109375, "learning_rate": 8e-05, "loss": 1.3047943115234375, "step": 1264 }, { "epoch": 0.3011149100639215, "grad_norm": 0.427734375, "learning_rate": 8e-05, "loss": 1.348783016204834, "step": 1266 }, { "epoch": 0.301590605024528, "grad_norm": 0.4453125, "learning_rate": 8e-05, "loss": 1.2833664417266846, "step": 1268 }, { "epoch": 0.3020662999851345, "grad_norm": 0.44921875, "learning_rate": 8e-05, "loss": 1.3276420831680298, "step": 1270 }, { "epoch": 0.302541994945741, "grad_norm": 0.427734375, "learning_rate": 8e-05, "loss": 1.3440744876861572, "step": 1272 }, { "epoch": 0.30301768990634753, "grad_norm": 0.421875, "learning_rate": 8e-05, "loss": 1.2924749851226807, "step": 1274 }, { "epoch": 0.30349338486695404, "grad_norm": 0.4140625, "learning_rate": 8e-05, "loss": 1.3171639442443848, "step": 1276 }, { "epoch": 0.30396907982756055, "grad_norm": 0.435546875, "learning_rate": 8e-05, "loss": 1.3555333614349365, "step": 1278 }, { "epoch": 0.30444477478816706, "grad_norm": 0.421875, "learning_rate": 8e-05, "loss": 1.2927653789520264, "step": 1280 }, { "epoch": 0.3049204697487736, "grad_norm": 0.4296875, "learning_rate": 8e-05, "loss": 1.34412682056427, "step": 1282 }, { "epoch": 0.30539616470938014, "grad_norm": 0.42578125, "learning_rate": 8e-05, "loss": 1.3178520202636719, "step": 1284 }, { "epoch": 0.30587185966998665, "grad_norm": 0.4296875, "learning_rate": 8e-05, "loss": 1.278599739074707, "step": 1286 }, { "epoch": 0.30634755463059316, "grad_norm": 0.404296875, "learning_rate": 8e-05, "loss": 1.254746437072754, "step": 1288 }, { "epoch": 0.30682324959119966, "grad_norm": 0.4296875, "learning_rate": 8e-05, "loss": 1.3239991664886475, "step": 1290 }, { "epoch": 0.3072989445518062, "grad_norm": 0.4140625, "learning_rate": 8e-05, "loss": 1.2205564975738525, "step": 1292 }, { "epoch": 0.3077746395124127, "grad_norm": 0.412109375, "learning_rate": 8e-05, "loss": 1.301189661026001, "step": 1294 }, { "epoch": 0.3082503344730192, "grad_norm": 0.404296875, "learning_rate": 8e-05, "loss": 1.3148789405822754, "step": 1296 }, { "epoch": 0.3087260294336257, "grad_norm": 0.412109375, "learning_rate": 8e-05, "loss": 1.3096203804016113, "step": 1298 }, { "epoch": 0.3092017243942322, "grad_norm": 0.41015625, "learning_rate": 8e-05, "loss": 1.3105592727661133, "step": 1300 }, { "epoch": 0.3096774193548387, "grad_norm": 0.41796875, "learning_rate": 8e-05, "loss": 1.312976360321045, "step": 1302 }, { "epoch": 0.31015311431544523, "grad_norm": 0.412109375, "learning_rate": 8e-05, "loss": 1.2987452745437622, "step": 1304 }, { "epoch": 0.31062880927605174, "grad_norm": 0.416015625, "learning_rate": 8e-05, "loss": 1.3321504592895508, "step": 1306 }, { "epoch": 0.31110450423665825, "grad_norm": 0.41796875, "learning_rate": 8e-05, "loss": 1.2754004001617432, "step": 1308 }, { "epoch": 0.31158019919726476, "grad_norm": 0.4375, "learning_rate": 8e-05, "loss": 1.3137989044189453, "step": 1310 }, { "epoch": 0.31205589415787127, "grad_norm": 0.41796875, "learning_rate": 8e-05, "loss": 1.311619520187378, "step": 1312 }, { "epoch": 0.3125315891184778, "grad_norm": 0.44140625, "learning_rate": 8e-05, "loss": 1.3559669256210327, "step": 1314 }, { "epoch": 0.3130072840790843, "grad_norm": 0.43359375, "learning_rate": 8e-05, "loss": 1.3143202066421509, "step": 1316 }, { "epoch": 0.3134829790396908, "grad_norm": 0.431640625, "learning_rate": 8e-05, "loss": 1.27866530418396, "step": 1318 }, { "epoch": 0.3139586740002973, "grad_norm": 0.43359375, "learning_rate": 8e-05, "loss": 1.3592901229858398, "step": 1320 }, { "epoch": 0.3144343689609038, "grad_norm": 0.41796875, "learning_rate": 8e-05, "loss": 1.2884161472320557, "step": 1322 }, { "epoch": 0.31491006392151033, "grad_norm": 0.423828125, "learning_rate": 8e-05, "loss": 1.3565433025360107, "step": 1324 }, { "epoch": 0.31538575888211684, "grad_norm": 0.4140625, "learning_rate": 8e-05, "loss": 1.237494945526123, "step": 1326 }, { "epoch": 0.31586145384272335, "grad_norm": 0.431640625, "learning_rate": 8e-05, "loss": 1.2647333145141602, "step": 1328 }, { "epoch": 0.31633714880332986, "grad_norm": 0.451171875, "learning_rate": 8e-05, "loss": 1.303452968597412, "step": 1330 }, { "epoch": 0.31681284376393637, "grad_norm": 0.4140625, "learning_rate": 8e-05, "loss": 1.297559142112732, "step": 1332 }, { "epoch": 0.3172885387245429, "grad_norm": 0.42578125, "learning_rate": 8e-05, "loss": 1.2743000984191895, "step": 1334 }, { "epoch": 0.3177642336851494, "grad_norm": 0.4453125, "learning_rate": 8e-05, "loss": 1.3135099411010742, "step": 1336 }, { "epoch": 0.3182399286457559, "grad_norm": 0.400390625, "learning_rate": 8e-05, "loss": 1.3139266967773438, "step": 1338 }, { "epoch": 0.3187156236063624, "grad_norm": 0.419921875, "learning_rate": 8e-05, "loss": 1.2548094987869263, "step": 1340 }, { "epoch": 0.3191913185669689, "grad_norm": 0.43359375, "learning_rate": 8e-05, "loss": 1.324897289276123, "step": 1342 }, { "epoch": 0.3196670135275754, "grad_norm": 0.427734375, "learning_rate": 8e-05, "loss": 1.256618618965149, "step": 1344 }, { "epoch": 0.32014270848818194, "grad_norm": 0.455078125, "learning_rate": 8e-05, "loss": 1.3173911571502686, "step": 1346 }, { "epoch": 0.32061840344878845, "grad_norm": 0.462890625, "learning_rate": 8e-05, "loss": 1.3058103322982788, "step": 1348 }, { "epoch": 0.32109409840939496, "grad_norm": 0.435546875, "learning_rate": 8e-05, "loss": 1.2872593402862549, "step": 1350 }, { "epoch": 0.32156979337000147, "grad_norm": 0.451171875, "learning_rate": 8e-05, "loss": 1.3403403759002686, "step": 1352 }, { "epoch": 0.322045488330608, "grad_norm": 0.396484375, "learning_rate": 8e-05, "loss": 1.320298433303833, "step": 1354 }, { "epoch": 0.3225211832912145, "grad_norm": 0.396484375, "learning_rate": 8e-05, "loss": 1.2507272958755493, "step": 1356 }, { "epoch": 0.322996878251821, "grad_norm": 0.400390625, "learning_rate": 8e-05, "loss": 1.2136597633361816, "step": 1358 }, { "epoch": 0.3234725732124275, "grad_norm": 0.447265625, "learning_rate": 8e-05, "loss": 1.302070140838623, "step": 1360 }, { "epoch": 0.323948268173034, "grad_norm": 0.4140625, "learning_rate": 8e-05, "loss": 1.3372619152069092, "step": 1362 }, { "epoch": 0.3244239631336406, "grad_norm": 0.44140625, "learning_rate": 8e-05, "loss": 1.2919752597808838, "step": 1364 }, { "epoch": 0.3248996580942471, "grad_norm": 0.41796875, "learning_rate": 8e-05, "loss": 1.3035356998443604, "step": 1366 }, { "epoch": 0.3253753530548536, "grad_norm": 0.427734375, "learning_rate": 8e-05, "loss": 1.3196332454681396, "step": 1368 }, { "epoch": 0.3258510480154601, "grad_norm": 0.44921875, "learning_rate": 8e-05, "loss": 1.2436224222183228, "step": 1370 }, { "epoch": 0.3263267429760666, "grad_norm": 0.423828125, "learning_rate": 8e-05, "loss": 1.3428776264190674, "step": 1372 }, { "epoch": 0.32680243793667313, "grad_norm": 0.421875, "learning_rate": 8e-05, "loss": 1.2375438213348389, "step": 1374 }, { "epoch": 0.32727813289727964, "grad_norm": 0.4609375, "learning_rate": 8e-05, "loss": 1.2973229885101318, "step": 1376 }, { "epoch": 0.32775382785788615, "grad_norm": 0.5078125, "learning_rate": 8e-05, "loss": 1.327859878540039, "step": 1378 }, { "epoch": 0.32822952281849266, "grad_norm": 0.47265625, "learning_rate": 8e-05, "loss": 1.286755919456482, "step": 1380 }, { "epoch": 0.32870521777909917, "grad_norm": 0.462890625, "learning_rate": 8e-05, "loss": 1.2941248416900635, "step": 1382 }, { "epoch": 0.3291809127397057, "grad_norm": 0.4609375, "learning_rate": 8e-05, "loss": 1.304626226425171, "step": 1384 }, { "epoch": 0.3296566077003122, "grad_norm": 0.419921875, "learning_rate": 8e-05, "loss": 1.2754319906234741, "step": 1386 }, { "epoch": 0.3301323026609187, "grad_norm": 0.431640625, "learning_rate": 8e-05, "loss": 1.2561947107315063, "step": 1388 }, { "epoch": 0.3306079976215252, "grad_norm": 0.431640625, "learning_rate": 8e-05, "loss": 1.2895267009735107, "step": 1390 }, { "epoch": 0.3310836925821317, "grad_norm": 0.474609375, "learning_rate": 8e-05, "loss": 1.386023759841919, "step": 1392 }, { "epoch": 0.3315593875427382, "grad_norm": 0.466796875, "learning_rate": 8e-05, "loss": 1.2892621755599976, "step": 1394 }, { "epoch": 0.33203508250334474, "grad_norm": 0.466796875, "learning_rate": 8e-05, "loss": 1.2891567945480347, "step": 1396 }, { "epoch": 0.33251077746395125, "grad_norm": 0.5078125, "learning_rate": 8e-05, "loss": 1.322417140007019, "step": 1398 }, { "epoch": 0.33298647242455776, "grad_norm": 0.439453125, "learning_rate": 8e-05, "loss": 1.3004422187805176, "step": 1400 }, { "epoch": 0.33346216738516427, "grad_norm": 0.43359375, "learning_rate": 8e-05, "loss": 1.2988290786743164, "step": 1402 }, { "epoch": 0.3339378623457708, "grad_norm": 0.43359375, "learning_rate": 8e-05, "loss": 1.3143279552459717, "step": 1404 }, { "epoch": 0.3344135573063773, "grad_norm": 0.43359375, "learning_rate": 8e-05, "loss": 1.281175136566162, "step": 1406 }, { "epoch": 0.3348892522669838, "grad_norm": 0.44921875, "learning_rate": 8e-05, "loss": 1.289182424545288, "step": 1408 }, { "epoch": 0.3353649472275903, "grad_norm": 0.435546875, "learning_rate": 8e-05, "loss": 1.2937148809432983, "step": 1410 }, { "epoch": 0.3358406421881968, "grad_norm": 0.427734375, "learning_rate": 8e-05, "loss": 1.2338749170303345, "step": 1412 }, { "epoch": 0.3363163371488033, "grad_norm": 0.42578125, "learning_rate": 8e-05, "loss": 1.2975019216537476, "step": 1414 }, { "epoch": 0.33679203210940983, "grad_norm": 0.427734375, "learning_rate": 8e-05, "loss": 1.2488545179367065, "step": 1416 }, { "epoch": 0.33726772707001634, "grad_norm": 0.4140625, "learning_rate": 8e-05, "loss": 1.289847493171692, "step": 1418 }, { "epoch": 0.33774342203062285, "grad_norm": 0.423828125, "learning_rate": 8e-05, "loss": 1.2936429977416992, "step": 1420 }, { "epoch": 0.33821911699122936, "grad_norm": 0.419921875, "learning_rate": 8e-05, "loss": 1.2761449813842773, "step": 1422 }, { "epoch": 0.3386948119518359, "grad_norm": 0.412109375, "learning_rate": 8e-05, "loss": 1.2870736122131348, "step": 1424 }, { "epoch": 0.3391705069124424, "grad_norm": 0.408203125, "learning_rate": 8e-05, "loss": 1.33278489112854, "step": 1426 }, { "epoch": 0.3396462018730489, "grad_norm": 0.40625, "learning_rate": 8e-05, "loss": 1.2804453372955322, "step": 1428 }, { "epoch": 0.3401218968336554, "grad_norm": 0.443359375, "learning_rate": 8e-05, "loss": 1.303015112876892, "step": 1430 }, { "epoch": 0.3405975917942619, "grad_norm": 0.419921875, "learning_rate": 8e-05, "loss": 1.333322286605835, "step": 1432 }, { "epoch": 0.3410732867548684, "grad_norm": 0.447265625, "learning_rate": 8e-05, "loss": 1.320211410522461, "step": 1434 }, { "epoch": 0.34154898171547493, "grad_norm": 0.423828125, "learning_rate": 8e-05, "loss": 1.3331108093261719, "step": 1436 }, { "epoch": 0.34202467667608144, "grad_norm": 0.435546875, "learning_rate": 8e-05, "loss": 1.26707923412323, "step": 1438 }, { "epoch": 0.34250037163668795, "grad_norm": 0.439453125, "learning_rate": 8e-05, "loss": 1.3269259929656982, "step": 1440 }, { "epoch": 0.34297606659729446, "grad_norm": 0.44921875, "learning_rate": 8e-05, "loss": 1.3051103353500366, "step": 1442 }, { "epoch": 0.34345176155790097, "grad_norm": 0.416015625, "learning_rate": 8e-05, "loss": 1.307328701019287, "step": 1444 }, { "epoch": 0.3439274565185075, "grad_norm": 0.6171875, "learning_rate": 8e-05, "loss": 1.3046774864196777, "step": 1446 }, { "epoch": 0.34440315147911404, "grad_norm": 0.412109375, "learning_rate": 8e-05, "loss": 1.3187592029571533, "step": 1448 }, { "epoch": 0.34487884643972055, "grad_norm": 0.421875, "learning_rate": 8e-05, "loss": 1.289937973022461, "step": 1450 }, { "epoch": 0.34535454140032706, "grad_norm": 0.42578125, "learning_rate": 8e-05, "loss": 1.300656795501709, "step": 1452 }, { "epoch": 0.3458302363609336, "grad_norm": 0.423828125, "learning_rate": 8e-05, "loss": 1.328467607498169, "step": 1454 }, { "epoch": 0.3463059313215401, "grad_norm": 0.44140625, "learning_rate": 8e-05, "loss": 1.3533457517623901, "step": 1456 }, { "epoch": 0.3467816262821466, "grad_norm": 0.416015625, "learning_rate": 8e-05, "loss": 1.2739849090576172, "step": 1458 }, { "epoch": 0.3472573212427531, "grad_norm": 0.419921875, "learning_rate": 8e-05, "loss": 1.3364741802215576, "step": 1460 }, { "epoch": 0.3477330162033596, "grad_norm": 0.427734375, "learning_rate": 8e-05, "loss": 1.2574357986450195, "step": 1462 }, { "epoch": 0.3482087111639661, "grad_norm": 0.427734375, "learning_rate": 8e-05, "loss": 1.3151860237121582, "step": 1464 }, { "epoch": 0.34868440612457263, "grad_norm": 0.431640625, "learning_rate": 8e-05, "loss": 1.3361979722976685, "step": 1466 }, { "epoch": 0.34916010108517914, "grad_norm": 0.4296875, "learning_rate": 8e-05, "loss": 1.3134095668792725, "step": 1468 }, { "epoch": 0.34963579604578565, "grad_norm": 0.419921875, "learning_rate": 8e-05, "loss": 1.2602635622024536, "step": 1470 }, { "epoch": 0.35011149100639216, "grad_norm": 0.423828125, "learning_rate": 8e-05, "loss": 1.2942792177200317, "step": 1472 }, { "epoch": 0.35058718596699867, "grad_norm": 0.4453125, "learning_rate": 8e-05, "loss": 1.3020391464233398, "step": 1474 }, { "epoch": 0.3510628809276052, "grad_norm": 0.443359375, "learning_rate": 8e-05, "loss": 1.2818697690963745, "step": 1476 }, { "epoch": 0.3515385758882117, "grad_norm": 0.484375, "learning_rate": 8e-05, "loss": 1.357285976409912, "step": 1478 }, { "epoch": 0.3520142708488182, "grad_norm": 0.4453125, "learning_rate": 8e-05, "loss": 1.256792664527893, "step": 1480 }, { "epoch": 0.3524899658094247, "grad_norm": 0.41796875, "learning_rate": 8e-05, "loss": 1.2922112941741943, "step": 1482 }, { "epoch": 0.3529656607700312, "grad_norm": 0.408203125, "learning_rate": 8e-05, "loss": 1.313403606414795, "step": 1484 }, { "epoch": 0.35344135573063773, "grad_norm": 0.41015625, "learning_rate": 8e-05, "loss": 1.3349361419677734, "step": 1486 }, { "epoch": 0.35391705069124424, "grad_norm": 0.38671875, "learning_rate": 8e-05, "loss": 1.2362300157546997, "step": 1488 }, { "epoch": 0.35439274565185075, "grad_norm": 0.423828125, "learning_rate": 8e-05, "loss": 1.2810988426208496, "step": 1490 }, { "epoch": 0.35486844061245726, "grad_norm": 0.4765625, "learning_rate": 8e-05, "loss": 1.3440229892730713, "step": 1492 }, { "epoch": 0.35534413557306377, "grad_norm": 0.4375, "learning_rate": 8e-05, "loss": 1.3103101253509521, "step": 1494 }, { "epoch": 0.3558198305336703, "grad_norm": 0.447265625, "learning_rate": 8e-05, "loss": 1.2558884620666504, "step": 1496 }, { "epoch": 0.3562955254942768, "grad_norm": 0.447265625, "learning_rate": 8e-05, "loss": 1.3156042098999023, "step": 1498 }, { "epoch": 0.3567712204548833, "grad_norm": 0.46484375, "learning_rate": 8e-05, "loss": 1.294625997543335, "step": 1500 }, { "epoch": 0.3572469154154898, "grad_norm": 0.4140625, "learning_rate": 8e-05, "loss": 1.2535991668701172, "step": 1502 }, { "epoch": 0.3577226103760963, "grad_norm": 0.416015625, "learning_rate": 8e-05, "loss": 1.281435489654541, "step": 1504 }, { "epoch": 0.3581983053367028, "grad_norm": 0.4375, "learning_rate": 8e-05, "loss": 1.3105072975158691, "step": 1506 }, { "epoch": 0.35867400029730934, "grad_norm": 0.40625, "learning_rate": 8e-05, "loss": 1.3389477729797363, "step": 1508 }, { "epoch": 0.35914969525791585, "grad_norm": 0.41015625, "learning_rate": 8e-05, "loss": 1.3460373878479004, "step": 1510 }, { "epoch": 0.35962539021852236, "grad_norm": 0.42578125, "learning_rate": 8e-05, "loss": 1.2376234531402588, "step": 1512 }, { "epoch": 0.36010108517912887, "grad_norm": 0.416015625, "learning_rate": 8e-05, "loss": 1.3217864036560059, "step": 1514 }, { "epoch": 0.3605767801397354, "grad_norm": 0.40625, "learning_rate": 8e-05, "loss": 1.2773277759552002, "step": 1516 }, { "epoch": 0.3610524751003419, "grad_norm": 0.41796875, "learning_rate": 8e-05, "loss": 1.2766404151916504, "step": 1518 }, { "epoch": 0.3615281700609484, "grad_norm": 0.390625, "learning_rate": 8e-05, "loss": 1.2760717868804932, "step": 1520 }, { "epoch": 0.3620038650215549, "grad_norm": 0.416015625, "learning_rate": 8e-05, "loss": 1.3625264167785645, "step": 1522 }, { "epoch": 0.3624795599821614, "grad_norm": 0.396484375, "learning_rate": 8e-05, "loss": 1.247987985610962, "step": 1524 }, { "epoch": 0.3629552549427679, "grad_norm": 0.40625, "learning_rate": 8e-05, "loss": 1.2980639934539795, "step": 1526 }, { "epoch": 0.36343094990337443, "grad_norm": 0.392578125, "learning_rate": 8e-05, "loss": 1.2997593879699707, "step": 1528 }, { "epoch": 0.363906644863981, "grad_norm": 0.384765625, "learning_rate": 8e-05, "loss": 1.2603178024291992, "step": 1530 }, { "epoch": 0.3643823398245875, "grad_norm": 0.384765625, "learning_rate": 8e-05, "loss": 1.302708625793457, "step": 1532 }, { "epoch": 0.364858034785194, "grad_norm": 0.416015625, "learning_rate": 8e-05, "loss": 1.2671213150024414, "step": 1534 }, { "epoch": 0.36533372974580053, "grad_norm": 0.4140625, "learning_rate": 8e-05, "loss": 1.3167269229888916, "step": 1536 }, { "epoch": 0.36580942470640704, "grad_norm": 0.443359375, "learning_rate": 8e-05, "loss": 1.3100299835205078, "step": 1538 }, { "epoch": 0.36628511966701355, "grad_norm": 0.380859375, "learning_rate": 8e-05, "loss": 1.2652174234390259, "step": 1540 }, { "epoch": 0.36676081462762006, "grad_norm": 0.408203125, "learning_rate": 8e-05, "loss": 1.3316433429718018, "step": 1542 }, { "epoch": 0.36723650958822657, "grad_norm": 0.421875, "learning_rate": 8e-05, "loss": 1.2940750122070312, "step": 1544 }, { "epoch": 0.3677122045488331, "grad_norm": 0.427734375, "learning_rate": 8e-05, "loss": 1.3008698225021362, "step": 1546 }, { "epoch": 0.3681878995094396, "grad_norm": 0.43359375, "learning_rate": 8e-05, "loss": 1.2859610319137573, "step": 1548 }, { "epoch": 0.3686635944700461, "grad_norm": 0.4609375, "learning_rate": 8e-05, "loss": 1.2531521320343018, "step": 1550 }, { "epoch": 0.3691392894306526, "grad_norm": 0.431640625, "learning_rate": 8e-05, "loss": 1.2496728897094727, "step": 1552 }, { "epoch": 0.3696149843912591, "grad_norm": 0.462890625, "learning_rate": 8e-05, "loss": 1.33748459815979, "step": 1554 }, { "epoch": 0.3700906793518656, "grad_norm": 0.47265625, "learning_rate": 8e-05, "loss": 1.2866451740264893, "step": 1556 }, { "epoch": 0.37056637431247214, "grad_norm": 0.46484375, "learning_rate": 8e-05, "loss": 1.34792160987854, "step": 1558 }, { "epoch": 0.37104206927307865, "grad_norm": 0.52734375, "learning_rate": 8e-05, "loss": 1.2783215045928955, "step": 1560 }, { "epoch": 0.37151776423368515, "grad_norm": 0.50390625, "learning_rate": 8e-05, "loss": 1.2765138149261475, "step": 1562 }, { "epoch": 0.37199345919429166, "grad_norm": 0.458984375, "learning_rate": 8e-05, "loss": 1.3496522903442383, "step": 1564 }, { "epoch": 0.3724691541548982, "grad_norm": 0.435546875, "learning_rate": 8e-05, "loss": 1.3075356483459473, "step": 1566 }, { "epoch": 0.3729448491155047, "grad_norm": 0.494140625, "learning_rate": 8e-05, "loss": 1.2998372316360474, "step": 1568 }, { "epoch": 0.3734205440761112, "grad_norm": 0.45703125, "learning_rate": 8e-05, "loss": 1.2793023586273193, "step": 1570 }, { "epoch": 0.3738962390367177, "grad_norm": 0.4375, "learning_rate": 8e-05, "loss": 1.2992515563964844, "step": 1572 }, { "epoch": 0.3743719339973242, "grad_norm": 0.423828125, "learning_rate": 8e-05, "loss": 1.2910690307617188, "step": 1574 }, { "epoch": 0.3748476289579307, "grad_norm": 0.455078125, "learning_rate": 8e-05, "loss": 1.2138452529907227, "step": 1576 }, { "epoch": 0.37532332391853723, "grad_norm": 0.45703125, "learning_rate": 8e-05, "loss": 1.2589681148529053, "step": 1578 }, { "epoch": 0.37579901887914374, "grad_norm": 0.431640625, "learning_rate": 8e-05, "loss": 1.3131399154663086, "step": 1580 }, { "epoch": 0.37627471383975025, "grad_norm": 0.43359375, "learning_rate": 8e-05, "loss": 1.2620333433151245, "step": 1582 }, { "epoch": 0.37675040880035676, "grad_norm": 0.4453125, "learning_rate": 8e-05, "loss": 1.2692234516143799, "step": 1584 }, { "epoch": 0.37722610376096327, "grad_norm": 0.45703125, "learning_rate": 8e-05, "loss": 1.3276705741882324, "step": 1586 }, { "epoch": 0.3777017987215698, "grad_norm": 0.408203125, "learning_rate": 8e-05, "loss": 1.2581058740615845, "step": 1588 }, { "epoch": 0.3781774936821763, "grad_norm": 0.431640625, "learning_rate": 8e-05, "loss": 1.3049333095550537, "step": 1590 }, { "epoch": 0.3786531886427828, "grad_norm": 0.439453125, "learning_rate": 8e-05, "loss": 1.2860021591186523, "step": 1592 }, { "epoch": 0.3791288836033893, "grad_norm": 0.408203125, "learning_rate": 8e-05, "loss": 1.3235461711883545, "step": 1594 }, { "epoch": 0.3796045785639958, "grad_norm": 0.412109375, "learning_rate": 8e-05, "loss": 1.2726843357086182, "step": 1596 }, { "epoch": 0.38008027352460233, "grad_norm": 0.41796875, "learning_rate": 8e-05, "loss": 1.2600030899047852, "step": 1598 }, { "epoch": 0.38055596848520884, "grad_norm": 0.427734375, "learning_rate": 8e-05, "loss": 1.3035047054290771, "step": 1600 }, { "epoch": 0.38103166344581535, "grad_norm": 0.43359375, "learning_rate": 8e-05, "loss": 1.2810437679290771, "step": 1602 }, { "epoch": 0.38150735840642186, "grad_norm": 0.455078125, "learning_rate": 8e-05, "loss": 1.3053010702133179, "step": 1604 }, { "epoch": 0.38198305336702837, "grad_norm": 0.4375, "learning_rate": 8e-05, "loss": 1.285233497619629, "step": 1606 }, { "epoch": 0.3824587483276349, "grad_norm": 0.474609375, "learning_rate": 8e-05, "loss": 1.328747034072876, "step": 1608 }, { "epoch": 0.3829344432882414, "grad_norm": 0.4140625, "learning_rate": 8e-05, "loss": 1.3264154195785522, "step": 1610 }, { "epoch": 0.38341013824884795, "grad_norm": 0.40625, "learning_rate": 8e-05, "loss": 1.2244741916656494, "step": 1612 }, { "epoch": 0.38388583320945446, "grad_norm": 0.40625, "learning_rate": 8e-05, "loss": 1.247675895690918, "step": 1614 }, { "epoch": 0.384361528170061, "grad_norm": 0.419921875, "learning_rate": 8e-05, "loss": 1.289712905883789, "step": 1616 }, { "epoch": 0.3848372231306675, "grad_norm": 0.42578125, "learning_rate": 8e-05, "loss": 1.299727439880371, "step": 1618 }, { "epoch": 0.385312918091274, "grad_norm": 0.404296875, "learning_rate": 8e-05, "loss": 1.229569911956787, "step": 1620 }, { "epoch": 0.3857886130518805, "grad_norm": 0.4296875, "learning_rate": 8e-05, "loss": 1.322393774986267, "step": 1622 }, { "epoch": 0.386264308012487, "grad_norm": 0.404296875, "learning_rate": 8e-05, "loss": 1.2661751508712769, "step": 1624 }, { "epoch": 0.3867400029730935, "grad_norm": 0.39453125, "learning_rate": 8e-05, "loss": 1.2912821769714355, "step": 1626 }, { "epoch": 0.38721569793370003, "grad_norm": 0.40625, "learning_rate": 8e-05, "loss": 1.2871348857879639, "step": 1628 }, { "epoch": 0.38769139289430654, "grad_norm": 0.396484375, "learning_rate": 8e-05, "loss": 1.2939109802246094, "step": 1630 }, { "epoch": 0.38816708785491305, "grad_norm": 0.396484375, "learning_rate": 8e-05, "loss": 1.2797859907150269, "step": 1632 }, { "epoch": 0.38864278281551956, "grad_norm": 0.396484375, "learning_rate": 8e-05, "loss": 1.2935690879821777, "step": 1634 }, { "epoch": 0.38911847777612607, "grad_norm": 0.400390625, "learning_rate": 8e-05, "loss": 1.2815215587615967, "step": 1636 }, { "epoch": 0.3895941727367326, "grad_norm": 0.3984375, "learning_rate": 8e-05, "loss": 1.2466614246368408, "step": 1638 }, { "epoch": 0.3900698676973391, "grad_norm": 0.39453125, "learning_rate": 8e-05, "loss": 1.3364429473876953, "step": 1640 }, { "epoch": 0.3905455626579456, "grad_norm": 0.396484375, "learning_rate": 8e-05, "loss": 1.2810378074645996, "step": 1642 }, { "epoch": 0.3910212576185521, "grad_norm": 0.390625, "learning_rate": 8e-05, "loss": 1.2879207134246826, "step": 1644 }, { "epoch": 0.3914969525791586, "grad_norm": 0.396484375, "learning_rate": 8e-05, "loss": 1.258134365081787, "step": 1646 }, { "epoch": 0.39197264753976513, "grad_norm": 0.4140625, "learning_rate": 8e-05, "loss": 1.2918128967285156, "step": 1648 }, { "epoch": 0.39244834250037164, "grad_norm": 0.416015625, "learning_rate": 8e-05, "loss": 1.2425835132598877, "step": 1650 }, { "epoch": 0.39292403746097815, "grad_norm": 0.400390625, "learning_rate": 8e-05, "loss": 1.213120698928833, "step": 1652 }, { "epoch": 0.39339973242158466, "grad_norm": 0.419921875, "learning_rate": 8e-05, "loss": 1.2344098091125488, "step": 1654 }, { "epoch": 0.39387542738219117, "grad_norm": 0.40625, "learning_rate": 8e-05, "loss": 1.2660351991653442, "step": 1656 }, { "epoch": 0.3943511223427977, "grad_norm": 0.421875, "learning_rate": 8e-05, "loss": 1.330662488937378, "step": 1658 }, { "epoch": 0.3948268173034042, "grad_norm": 0.41796875, "learning_rate": 8e-05, "loss": 1.2738041877746582, "step": 1660 }, { "epoch": 0.3953025122640107, "grad_norm": 0.41796875, "learning_rate": 8e-05, "loss": 1.2888221740722656, "step": 1662 }, { "epoch": 0.3957782072246172, "grad_norm": 0.41796875, "learning_rate": 8e-05, "loss": 1.288313865661621, "step": 1664 }, { "epoch": 0.3962539021852237, "grad_norm": 0.40234375, "learning_rate": 8e-05, "loss": 1.290661096572876, "step": 1666 }, { "epoch": 0.3967295971458302, "grad_norm": 0.380859375, "learning_rate": 8e-05, "loss": 1.249962568283081, "step": 1668 }, { "epoch": 0.39720529210643674, "grad_norm": 0.37890625, "learning_rate": 8e-05, "loss": 1.2480006217956543, "step": 1670 }, { "epoch": 0.39768098706704325, "grad_norm": 0.36328125, "learning_rate": 8e-05, "loss": 1.2179031372070312, "step": 1672 }, { "epoch": 0.39815668202764976, "grad_norm": 0.388671875, "learning_rate": 8e-05, "loss": 1.3344948291778564, "step": 1674 }, { "epoch": 0.39863237698825627, "grad_norm": 0.41796875, "learning_rate": 8e-05, "loss": 1.2962273359298706, "step": 1676 }, { "epoch": 0.3991080719488628, "grad_norm": 0.400390625, "learning_rate": 8e-05, "loss": 1.266413688659668, "step": 1678 }, { "epoch": 0.3995837669094693, "grad_norm": 0.390625, "learning_rate": 8e-05, "loss": 1.2850923538208008, "step": 1680 }, { "epoch": 0.4000594618700758, "grad_norm": 0.40234375, "learning_rate": 8e-05, "loss": 1.2746737003326416, "step": 1682 }, { "epoch": 0.4005351568306823, "grad_norm": 0.412109375, "learning_rate": 8e-05, "loss": 1.2807002067565918, "step": 1684 }, { "epoch": 0.4010108517912888, "grad_norm": 0.4140625, "learning_rate": 8e-05, "loss": 1.2683579921722412, "step": 1686 }, { "epoch": 0.4014865467518953, "grad_norm": 0.4140625, "learning_rate": 8e-05, "loss": 1.2930378913879395, "step": 1688 }, { "epoch": 0.40196224171250183, "grad_norm": 0.408203125, "learning_rate": 8e-05, "loss": 1.2729506492614746, "step": 1690 }, { "epoch": 0.40243793667310834, "grad_norm": 0.443359375, "learning_rate": 8e-05, "loss": 1.2865461111068726, "step": 1692 }, { "epoch": 0.40291363163371485, "grad_norm": 0.421875, "learning_rate": 8e-05, "loss": 1.249645709991455, "step": 1694 }, { "epoch": 0.4033893265943214, "grad_norm": 0.3671875, "learning_rate": 8e-05, "loss": 1.2383712530136108, "step": 1696 }, { "epoch": 0.4038650215549279, "grad_norm": 0.3828125, "learning_rate": 8e-05, "loss": 1.246313452720642, "step": 1698 }, { "epoch": 0.40434071651553444, "grad_norm": 0.392578125, "learning_rate": 8e-05, "loss": 1.2288974523544312, "step": 1700 }, { "epoch": 0.40481641147614095, "grad_norm": 0.38671875, "learning_rate": 8e-05, "loss": 1.2907836437225342, "step": 1702 }, { "epoch": 0.40529210643674746, "grad_norm": 0.390625, "learning_rate": 8e-05, "loss": 1.2730671167373657, "step": 1704 }, { "epoch": 0.40576780139735397, "grad_norm": 0.40234375, "learning_rate": 8e-05, "loss": 1.3130565881729126, "step": 1706 }, { "epoch": 0.4062434963579605, "grad_norm": 0.404296875, "learning_rate": 8e-05, "loss": 1.2681382894515991, "step": 1708 }, { "epoch": 0.406719191318567, "grad_norm": 0.40234375, "learning_rate": 8e-05, "loss": 1.296158790588379, "step": 1710 }, { "epoch": 0.4071948862791735, "grad_norm": 0.3828125, "learning_rate": 8e-05, "loss": 1.247192144393921, "step": 1712 }, { "epoch": 0.40767058123978, "grad_norm": 0.41015625, "learning_rate": 8e-05, "loss": 1.2639563083648682, "step": 1714 }, { "epoch": 0.4081462762003865, "grad_norm": 0.435546875, "learning_rate": 8e-05, "loss": 1.2656540870666504, "step": 1716 }, { "epoch": 0.408621971160993, "grad_norm": 0.392578125, "learning_rate": 8e-05, "loss": 1.205491542816162, "step": 1718 }, { "epoch": 0.40909766612159953, "grad_norm": 0.40234375, "learning_rate": 8e-05, "loss": 1.277217984199524, "step": 1720 }, { "epoch": 0.40957336108220604, "grad_norm": 0.443359375, "learning_rate": 8e-05, "loss": 1.3079639673233032, "step": 1722 }, { "epoch": 0.41004905604281255, "grad_norm": 0.458984375, "learning_rate": 8e-05, "loss": 1.2672646045684814, "step": 1724 }, { "epoch": 0.41052475100341906, "grad_norm": 0.474609375, "learning_rate": 8e-05, "loss": 1.2784157991409302, "step": 1726 }, { "epoch": 0.4110004459640256, "grad_norm": 0.484375, "learning_rate": 8e-05, "loss": 1.2682194709777832, "step": 1728 }, { "epoch": 0.4114761409246321, "grad_norm": 0.423828125, "learning_rate": 8e-05, "loss": 1.2786941528320312, "step": 1730 }, { "epoch": 0.4119518358852386, "grad_norm": 0.3828125, "learning_rate": 8e-05, "loss": 1.281358003616333, "step": 1732 }, { "epoch": 0.4124275308458451, "grad_norm": 0.384765625, "learning_rate": 8e-05, "loss": 1.246321678161621, "step": 1734 }, { "epoch": 0.4129032258064516, "grad_norm": 0.396484375, "learning_rate": 8e-05, "loss": 1.2259719371795654, "step": 1736 }, { "epoch": 0.4133789207670581, "grad_norm": 0.390625, "learning_rate": 8e-05, "loss": 1.3222472667694092, "step": 1738 }, { "epoch": 0.41385461572766463, "grad_norm": 0.384765625, "learning_rate": 8e-05, "loss": 1.216452956199646, "step": 1740 }, { "epoch": 0.41433031068827114, "grad_norm": 0.396484375, "learning_rate": 8e-05, "loss": 1.3299109935760498, "step": 1742 }, { "epoch": 0.41480600564887765, "grad_norm": 0.40234375, "learning_rate": 8e-05, "loss": 1.250302791595459, "step": 1744 }, { "epoch": 0.41528170060948416, "grad_norm": 0.400390625, "learning_rate": 8e-05, "loss": 1.2744085788726807, "step": 1746 }, { "epoch": 0.41575739557009067, "grad_norm": 0.40625, "learning_rate": 8e-05, "loss": 1.2700858116149902, "step": 1748 }, { "epoch": 0.4162330905306972, "grad_norm": 0.38671875, "learning_rate": 8e-05, "loss": 1.2627267837524414, "step": 1750 }, { "epoch": 0.4167087854913037, "grad_norm": 0.39453125, "learning_rate": 8e-05, "loss": 1.269378423690796, "step": 1752 }, { "epoch": 0.4171844804519102, "grad_norm": 0.384765625, "learning_rate": 8e-05, "loss": 1.3320337533950806, "step": 1754 }, { "epoch": 0.4176601754125167, "grad_norm": 0.380859375, "learning_rate": 8e-05, "loss": 1.2717125415802002, "step": 1756 }, { "epoch": 0.4181358703731232, "grad_norm": 0.3984375, "learning_rate": 8e-05, "loss": 1.2660107612609863, "step": 1758 }, { "epoch": 0.41861156533372973, "grad_norm": 0.3984375, "learning_rate": 8e-05, "loss": 1.2718021869659424, "step": 1760 }, { "epoch": 0.41908726029433624, "grad_norm": 0.38671875, "learning_rate": 8e-05, "loss": 1.2778680324554443, "step": 1762 }, { "epoch": 0.41956295525494275, "grad_norm": 0.3828125, "learning_rate": 8e-05, "loss": 1.247718334197998, "step": 1764 }, { "epoch": 0.42003865021554926, "grad_norm": 0.4140625, "learning_rate": 8e-05, "loss": 1.2594034671783447, "step": 1766 }, { "epoch": 0.42051434517615577, "grad_norm": 0.400390625, "learning_rate": 8e-05, "loss": 1.2796156406402588, "step": 1768 }, { "epoch": 0.4209900401367623, "grad_norm": 0.388671875, "learning_rate": 8e-05, "loss": 1.2971086502075195, "step": 1770 }, { "epoch": 0.4214657350973688, "grad_norm": 0.431640625, "learning_rate": 8e-05, "loss": 1.2875339984893799, "step": 1772 }, { "epoch": 0.4219414300579753, "grad_norm": 0.416015625, "learning_rate": 8e-05, "loss": 1.2802563905715942, "step": 1774 }, { "epoch": 0.4224171250185818, "grad_norm": 0.455078125, "learning_rate": 8e-05, "loss": 1.297211766242981, "step": 1776 }, { "epoch": 0.4228928199791884, "grad_norm": 0.4296875, "learning_rate": 8e-05, "loss": 1.1946825981140137, "step": 1778 }, { "epoch": 0.4233685149397949, "grad_norm": 0.421875, "learning_rate": 8e-05, "loss": 1.256436824798584, "step": 1780 }, { "epoch": 0.4238442099004014, "grad_norm": 0.380859375, "learning_rate": 8e-05, "loss": 1.2787179946899414, "step": 1782 }, { "epoch": 0.4243199048610079, "grad_norm": 0.392578125, "learning_rate": 8e-05, "loss": 1.2537882328033447, "step": 1784 }, { "epoch": 0.4247955998216144, "grad_norm": 0.380859375, "learning_rate": 8e-05, "loss": 1.2897108793258667, "step": 1786 }, { "epoch": 0.4252712947822209, "grad_norm": 0.40625, "learning_rate": 8e-05, "loss": 1.2501720190048218, "step": 1788 }, { "epoch": 0.42574698974282743, "grad_norm": 0.39453125, "learning_rate": 8e-05, "loss": 1.2753134965896606, "step": 1790 }, { "epoch": 0.42622268470343394, "grad_norm": 0.46875, "learning_rate": 8e-05, "loss": 1.2809417247772217, "step": 1792 }, { "epoch": 0.42669837966404045, "grad_norm": 0.51171875, "learning_rate": 8e-05, "loss": 1.2722747325897217, "step": 1794 }, { "epoch": 0.42717407462464696, "grad_norm": 0.435546875, "learning_rate": 8e-05, "loss": 1.294141411781311, "step": 1796 }, { "epoch": 0.42764976958525347, "grad_norm": 0.435546875, "learning_rate": 8e-05, "loss": 1.2701992988586426, "step": 1798 }, { "epoch": 0.42812546454586, "grad_norm": 0.44140625, "learning_rate": 8e-05, "loss": 1.2379639148712158, "step": 1800 }, { "epoch": 0.4286011595064665, "grad_norm": 0.443359375, "learning_rate": 8e-05, "loss": 1.262845516204834, "step": 1802 }, { "epoch": 0.429076854467073, "grad_norm": 0.400390625, "learning_rate": 8e-05, "loss": 1.213247299194336, "step": 1804 }, { "epoch": 0.4295525494276795, "grad_norm": 0.421875, "learning_rate": 8e-05, "loss": 1.270270824432373, "step": 1806 }, { "epoch": 0.430028244388286, "grad_norm": 0.396484375, "learning_rate": 8e-05, "loss": 1.1981269121170044, "step": 1808 }, { "epoch": 0.43050393934889253, "grad_norm": 0.4296875, "learning_rate": 8e-05, "loss": 1.2772140502929688, "step": 1810 }, { "epoch": 0.43097963430949904, "grad_norm": 0.4296875, "learning_rate": 8e-05, "loss": 1.2868304252624512, "step": 1812 }, { "epoch": 0.43145532927010555, "grad_norm": 0.41015625, "learning_rate": 8e-05, "loss": 1.2759490013122559, "step": 1814 }, { "epoch": 0.43193102423071206, "grad_norm": 0.4140625, "learning_rate": 8e-05, "loss": 1.292006015777588, "step": 1816 }, { "epoch": 0.43240671919131857, "grad_norm": 0.435546875, "learning_rate": 8e-05, "loss": 1.2290836572647095, "step": 1818 }, { "epoch": 0.4328824141519251, "grad_norm": 0.3984375, "learning_rate": 8e-05, "loss": 1.2799795866012573, "step": 1820 }, { "epoch": 0.4333581091125316, "grad_norm": 0.3984375, "learning_rate": 8e-05, "loss": 1.2700903415679932, "step": 1822 }, { "epoch": 0.4338338040731381, "grad_norm": 0.400390625, "learning_rate": 8e-05, "loss": 1.276972770690918, "step": 1824 }, { "epoch": 0.4343094990337446, "grad_norm": 0.376953125, "learning_rate": 8e-05, "loss": 1.2546138763427734, "step": 1826 }, { "epoch": 0.4347851939943511, "grad_norm": 0.42578125, "learning_rate": 8e-05, "loss": 1.2350144386291504, "step": 1828 }, { "epoch": 0.4352608889549576, "grad_norm": 0.404296875, "learning_rate": 8e-05, "loss": 1.255623459815979, "step": 1830 }, { "epoch": 0.43573658391556414, "grad_norm": 0.3984375, "learning_rate": 8e-05, "loss": 1.3046503067016602, "step": 1832 }, { "epoch": 0.43621227887617064, "grad_norm": 0.400390625, "learning_rate": 8e-05, "loss": 1.2888526916503906, "step": 1834 }, { "epoch": 0.43668797383677715, "grad_norm": 0.384765625, "learning_rate": 8e-05, "loss": 1.2555067539215088, "step": 1836 }, { "epoch": 0.43716366879738366, "grad_norm": 0.423828125, "learning_rate": 8e-05, "loss": 1.3039183616638184, "step": 1838 }, { "epoch": 0.4376393637579902, "grad_norm": 0.3828125, "learning_rate": 8e-05, "loss": 1.2199631929397583, "step": 1840 }, { "epoch": 0.4381150587185967, "grad_norm": 0.40234375, "learning_rate": 8e-05, "loss": 1.2994630336761475, "step": 1842 }, { "epoch": 0.4385907536792032, "grad_norm": 0.392578125, "learning_rate": 8e-05, "loss": 1.241573452949524, "step": 1844 }, { "epoch": 0.4390664486398097, "grad_norm": 0.396484375, "learning_rate": 8e-05, "loss": 1.2570286989212036, "step": 1846 }, { "epoch": 0.4395421436004162, "grad_norm": 0.41015625, "learning_rate": 8e-05, "loss": 1.2594881057739258, "step": 1848 }, { "epoch": 0.4400178385610227, "grad_norm": 0.423828125, "learning_rate": 8e-05, "loss": 1.2928112745285034, "step": 1850 }, { "epoch": 0.44049353352162923, "grad_norm": 0.40234375, "learning_rate": 8e-05, "loss": 1.2726936340332031, "step": 1852 }, { "epoch": 0.44096922848223574, "grad_norm": 0.404296875, "learning_rate": 8e-05, "loss": 1.32316255569458, "step": 1854 }, { "epoch": 0.44144492344284225, "grad_norm": 0.396484375, "learning_rate": 8e-05, "loss": 1.2438604831695557, "step": 1856 }, { "epoch": 0.44192061840344876, "grad_norm": 0.396484375, "learning_rate": 8e-05, "loss": 1.293677568435669, "step": 1858 }, { "epoch": 0.4423963133640553, "grad_norm": 0.408203125, "learning_rate": 8e-05, "loss": 1.3002068996429443, "step": 1860 }, { "epoch": 0.44287200832466184, "grad_norm": 0.45703125, "learning_rate": 8e-05, "loss": 1.294407844543457, "step": 1862 }, { "epoch": 0.44334770328526835, "grad_norm": 0.400390625, "learning_rate": 8e-05, "loss": 1.242573857307434, "step": 1864 }, { "epoch": 0.44382339824587486, "grad_norm": 0.396484375, "learning_rate": 8e-05, "loss": 1.2922019958496094, "step": 1866 }, { "epoch": 0.44429909320648137, "grad_norm": 0.408203125, "learning_rate": 8e-05, "loss": 1.2213215827941895, "step": 1868 }, { "epoch": 0.4447747881670879, "grad_norm": 0.40625, "learning_rate": 8e-05, "loss": 1.2707006931304932, "step": 1870 }, { "epoch": 0.4452504831276944, "grad_norm": 0.39453125, "learning_rate": 8e-05, "loss": 1.2314999103546143, "step": 1872 }, { "epoch": 0.4457261780883009, "grad_norm": 0.3984375, "learning_rate": 8e-05, "loss": 1.3218889236450195, "step": 1874 }, { "epoch": 0.4462018730489074, "grad_norm": 0.3984375, "learning_rate": 8e-05, "loss": 1.2159051895141602, "step": 1876 }, { "epoch": 0.4466775680095139, "grad_norm": 0.390625, "learning_rate": 8e-05, "loss": 1.3076913356781006, "step": 1878 }, { "epoch": 0.4471532629701204, "grad_norm": 0.37890625, "learning_rate": 8e-05, "loss": 1.2779921293258667, "step": 1880 }, { "epoch": 0.44762895793072693, "grad_norm": 0.40234375, "learning_rate": 8e-05, "loss": 1.2604446411132812, "step": 1882 }, { "epoch": 0.44810465289133344, "grad_norm": 0.376953125, "learning_rate": 8e-05, "loss": 1.2382320165634155, "step": 1884 }, { "epoch": 0.44858034785193995, "grad_norm": 0.40234375, "learning_rate": 8e-05, "loss": 1.3830101490020752, "step": 1886 }, { "epoch": 0.44905604281254646, "grad_norm": 0.396484375, "learning_rate": 8e-05, "loss": 1.228447675704956, "step": 1888 }, { "epoch": 0.449531737773153, "grad_norm": 0.43359375, "learning_rate": 8e-05, "loss": 1.2446924448013306, "step": 1890 }, { "epoch": 0.4500074327337595, "grad_norm": 0.455078125, "learning_rate": 8e-05, "loss": 1.2444430589675903, "step": 1892 }, { "epoch": 0.450483127694366, "grad_norm": 0.41015625, "learning_rate": 8e-05, "loss": 1.1985334157943726, "step": 1894 }, { "epoch": 0.4509588226549725, "grad_norm": 0.3984375, "learning_rate": 8e-05, "loss": 1.2600021362304688, "step": 1896 }, { "epoch": 0.451434517615579, "grad_norm": 0.4375, "learning_rate": 8e-05, "loss": 1.2708806991577148, "step": 1898 }, { "epoch": 0.4519102125761855, "grad_norm": 0.408203125, "learning_rate": 8e-05, "loss": 1.273937702178955, "step": 1900 }, { "epoch": 0.45238590753679203, "grad_norm": 0.41015625, "learning_rate": 8e-05, "loss": 1.2232747077941895, "step": 1902 }, { "epoch": 0.45286160249739854, "grad_norm": 0.400390625, "learning_rate": 8e-05, "loss": 1.2665836811065674, "step": 1904 }, { "epoch": 0.45333729745800505, "grad_norm": 0.40234375, "learning_rate": 8e-05, "loss": 1.2973511219024658, "step": 1906 }, { "epoch": 0.45381299241861156, "grad_norm": 0.39453125, "learning_rate": 8e-05, "loss": 1.239840030670166, "step": 1908 }, { "epoch": 0.45428868737921807, "grad_norm": 0.396484375, "learning_rate": 8e-05, "loss": 1.2163124084472656, "step": 1910 }, { "epoch": 0.4547643823398246, "grad_norm": 0.3828125, "learning_rate": 8e-05, "loss": 1.245084285736084, "step": 1912 }, { "epoch": 0.4552400773004311, "grad_norm": 0.40234375, "learning_rate": 8e-05, "loss": 1.3053221702575684, "step": 1914 }, { "epoch": 0.4557157722610376, "grad_norm": 0.41015625, "learning_rate": 8e-05, "loss": 1.2788605690002441, "step": 1916 }, { "epoch": 0.4561914672216441, "grad_norm": 0.404296875, "learning_rate": 8e-05, "loss": 1.2649834156036377, "step": 1918 }, { "epoch": 0.4566671621822506, "grad_norm": 0.38671875, "learning_rate": 8e-05, "loss": 1.2921392917633057, "step": 1920 }, { "epoch": 0.45714285714285713, "grad_norm": 0.40234375, "learning_rate": 8e-05, "loss": 1.2440087795257568, "step": 1922 }, { "epoch": 0.45761855210346364, "grad_norm": 0.36328125, "learning_rate": 8e-05, "loss": 1.2340590953826904, "step": 1924 }, { "epoch": 0.45809424706407015, "grad_norm": 0.390625, "learning_rate": 8e-05, "loss": 1.2697665691375732, "step": 1926 }, { "epoch": 0.45856994202467666, "grad_norm": 0.41796875, "learning_rate": 8e-05, "loss": 1.2789232730865479, "step": 1928 }, { "epoch": 0.45904563698528317, "grad_norm": 0.4375, "learning_rate": 8e-05, "loss": 1.262975811958313, "step": 1930 }, { "epoch": 0.4595213319458897, "grad_norm": 0.404296875, "learning_rate": 8e-05, "loss": 1.286508321762085, "step": 1932 }, { "epoch": 0.4599970269064962, "grad_norm": 0.41015625, "learning_rate": 8e-05, "loss": 1.2848749160766602, "step": 1934 }, { "epoch": 0.4604727218671027, "grad_norm": 0.4296875, "learning_rate": 8e-05, "loss": 1.253462314605713, "step": 1936 }, { "epoch": 0.4609484168277092, "grad_norm": 0.41796875, "learning_rate": 8e-05, "loss": 1.2408068180084229, "step": 1938 }, { "epoch": 0.4614241117883157, "grad_norm": 0.427734375, "learning_rate": 8e-05, "loss": 1.2684673070907593, "step": 1940 }, { "epoch": 0.4618998067489222, "grad_norm": 0.39453125, "learning_rate": 8e-05, "loss": 1.2076165676116943, "step": 1942 }, { "epoch": 0.4623755017095288, "grad_norm": 0.392578125, "learning_rate": 8e-05, "loss": 1.2525750398635864, "step": 1944 }, { "epoch": 0.4628511966701353, "grad_norm": 0.392578125, "learning_rate": 8e-05, "loss": 1.2482715845108032, "step": 1946 }, { "epoch": 0.4633268916307418, "grad_norm": 0.41796875, "learning_rate": 8e-05, "loss": 1.2954301834106445, "step": 1948 }, { "epoch": 0.4638025865913483, "grad_norm": 0.4375, "learning_rate": 8e-05, "loss": 1.2526676654815674, "step": 1950 }, { "epoch": 0.46427828155195483, "grad_norm": 0.408203125, "learning_rate": 8e-05, "loss": 1.270956039428711, "step": 1952 }, { "epoch": 0.46475397651256134, "grad_norm": 0.40234375, "learning_rate": 8e-05, "loss": 1.2305779457092285, "step": 1954 }, { "epoch": 0.46522967147316785, "grad_norm": 0.41015625, "learning_rate": 8e-05, "loss": 1.3026628494262695, "step": 1956 }, { "epoch": 0.46570536643377436, "grad_norm": 0.42578125, "learning_rate": 8e-05, "loss": 1.276360273361206, "step": 1958 }, { "epoch": 0.46618106139438087, "grad_norm": 0.3984375, "learning_rate": 8e-05, "loss": 1.2271491289138794, "step": 1960 }, { "epoch": 0.4666567563549874, "grad_norm": 0.39453125, "learning_rate": 8e-05, "loss": 1.27445650100708, "step": 1962 }, { "epoch": 0.4671324513155939, "grad_norm": 0.40625, "learning_rate": 8e-05, "loss": 1.2661042213439941, "step": 1964 }, { "epoch": 0.4676081462762004, "grad_norm": 0.390625, "learning_rate": 8e-05, "loss": 1.2653608322143555, "step": 1966 }, { "epoch": 0.4680838412368069, "grad_norm": 0.388671875, "learning_rate": 8e-05, "loss": 1.2979223728179932, "step": 1968 }, { "epoch": 0.4685595361974134, "grad_norm": 0.376953125, "learning_rate": 8e-05, "loss": 1.2312049865722656, "step": 1970 }, { "epoch": 0.4690352311580199, "grad_norm": 0.3984375, "learning_rate": 8e-05, "loss": 1.3057024478912354, "step": 1972 }, { "epoch": 0.46951092611862644, "grad_norm": 0.400390625, "learning_rate": 8e-05, "loss": 1.2951257228851318, "step": 1974 }, { "epoch": 0.46998662107923295, "grad_norm": 0.404296875, "learning_rate": 8e-05, "loss": 1.2718441486358643, "step": 1976 }, { "epoch": 0.47046231603983946, "grad_norm": 0.41796875, "learning_rate": 8e-05, "loss": 1.2305831909179688, "step": 1978 }, { "epoch": 0.47093801100044597, "grad_norm": 0.392578125, "learning_rate": 8e-05, "loss": 1.2023491859436035, "step": 1980 }, { "epoch": 0.4714137059610525, "grad_norm": 0.400390625, "learning_rate": 8e-05, "loss": 1.219580888748169, "step": 1982 }, { "epoch": 0.471889400921659, "grad_norm": 0.41796875, "learning_rate": 8e-05, "loss": 1.247983455657959, "step": 1984 }, { "epoch": 0.4723650958822655, "grad_norm": 0.41015625, "learning_rate": 8e-05, "loss": 1.2529908418655396, "step": 1986 }, { "epoch": 0.472840790842872, "grad_norm": 0.38671875, "learning_rate": 8e-05, "loss": 1.2776696681976318, "step": 1988 }, { "epoch": 0.4733164858034785, "grad_norm": 0.390625, "learning_rate": 8e-05, "loss": 1.249483585357666, "step": 1990 }, { "epoch": 0.473792180764085, "grad_norm": 0.412109375, "learning_rate": 8e-05, "loss": 1.3180161714553833, "step": 1992 }, { "epoch": 0.47426787572469153, "grad_norm": 0.36328125, "learning_rate": 8e-05, "loss": 1.2910526990890503, "step": 1994 }, { "epoch": 0.47474357068529804, "grad_norm": 0.390625, "learning_rate": 8e-05, "loss": 1.2458434104919434, "step": 1996 }, { "epoch": 0.47521926564590455, "grad_norm": 0.3984375, "learning_rate": 8e-05, "loss": 1.3033870458602905, "step": 1998 }, { "epoch": 0.47569496060651106, "grad_norm": 0.396484375, "learning_rate": 8e-05, "loss": 1.2500510215759277, "step": 2000 }, { "epoch": 0.4761706555671176, "grad_norm": 0.396484375, "learning_rate": 8e-05, "loss": 1.2625623941421509, "step": 2002 }, { "epoch": 0.4766463505277241, "grad_norm": 0.384765625, "learning_rate": 8e-05, "loss": 1.2688162326812744, "step": 2004 }, { "epoch": 0.4771220454883306, "grad_norm": 0.39453125, "learning_rate": 8e-05, "loss": 1.2744543552398682, "step": 2006 }, { "epoch": 0.4775977404489371, "grad_norm": 0.3984375, "learning_rate": 8e-05, "loss": 1.28806471824646, "step": 2008 }, { "epoch": 0.4780734354095436, "grad_norm": 0.392578125, "learning_rate": 8e-05, "loss": 1.2513093948364258, "step": 2010 }, { "epoch": 0.4785491303701501, "grad_norm": 0.396484375, "learning_rate": 8e-05, "loss": 1.2578754425048828, "step": 2012 }, { "epoch": 0.47902482533075663, "grad_norm": 0.396484375, "learning_rate": 8e-05, "loss": 1.2690682411193848, "step": 2014 }, { "epoch": 0.47950052029136314, "grad_norm": 0.3828125, "learning_rate": 8e-05, "loss": 1.244710922241211, "step": 2016 }, { "epoch": 0.47997621525196965, "grad_norm": 0.396484375, "learning_rate": 8e-05, "loss": 1.2703763246536255, "step": 2018 }, { "epoch": 0.48045191021257616, "grad_norm": 0.392578125, "learning_rate": 8e-05, "loss": 1.289364218711853, "step": 2020 }, { "epoch": 0.48092760517318267, "grad_norm": 0.39453125, "learning_rate": 8e-05, "loss": 1.2618646621704102, "step": 2022 }, { "epoch": 0.4814033001337892, "grad_norm": 0.39453125, "learning_rate": 8e-05, "loss": 1.2179932594299316, "step": 2024 }, { "epoch": 0.48187899509439575, "grad_norm": 0.41015625, "learning_rate": 8e-05, "loss": 1.2760300636291504, "step": 2026 }, { "epoch": 0.48235469005500226, "grad_norm": 0.40625, "learning_rate": 8e-05, "loss": 1.2909011840820312, "step": 2028 }, { "epoch": 0.48283038501560877, "grad_norm": 0.400390625, "learning_rate": 8e-05, "loss": 1.2532756328582764, "step": 2030 }, { "epoch": 0.4833060799762153, "grad_norm": 0.37890625, "learning_rate": 8e-05, "loss": 1.2610164880752563, "step": 2032 }, { "epoch": 0.4837817749368218, "grad_norm": 0.376953125, "learning_rate": 8e-05, "loss": 1.2327613830566406, "step": 2034 }, { "epoch": 0.4842574698974283, "grad_norm": 0.40625, "learning_rate": 8e-05, "loss": 1.2339394092559814, "step": 2036 }, { "epoch": 0.4847331648580348, "grad_norm": 0.3828125, "learning_rate": 8e-05, "loss": 1.2110052108764648, "step": 2038 }, { "epoch": 0.4852088598186413, "grad_norm": 0.3671875, "learning_rate": 8e-05, "loss": 1.2191238403320312, "step": 2040 }, { "epoch": 0.4856845547792478, "grad_norm": 0.37109375, "learning_rate": 8e-05, "loss": 1.2192617654800415, "step": 2042 }, { "epoch": 0.48616024973985433, "grad_norm": 0.380859375, "learning_rate": 8e-05, "loss": 1.2636759281158447, "step": 2044 }, { "epoch": 0.48663594470046084, "grad_norm": 0.41015625, "learning_rate": 8e-05, "loss": 1.207831621170044, "step": 2046 }, { "epoch": 0.48711163966106735, "grad_norm": 0.419921875, "learning_rate": 8e-05, "loss": 1.1997129917144775, "step": 2048 }, { "epoch": 0.48758733462167386, "grad_norm": 0.455078125, "learning_rate": 8e-05, "loss": 1.2564668655395508, "step": 2050 }, { "epoch": 0.48806302958228037, "grad_norm": 0.416015625, "learning_rate": 8e-05, "loss": 1.2735317945480347, "step": 2052 }, { "epoch": 0.4885387245428869, "grad_norm": 0.427734375, "learning_rate": 8e-05, "loss": 1.2732312679290771, "step": 2054 }, { "epoch": 0.4890144195034934, "grad_norm": 0.408203125, "learning_rate": 8e-05, "loss": 1.2548601627349854, "step": 2056 }, { "epoch": 0.4894901144640999, "grad_norm": 0.396484375, "learning_rate": 8e-05, "loss": 1.2576682567596436, "step": 2058 }, { "epoch": 0.4899658094247064, "grad_norm": 0.373046875, "learning_rate": 8e-05, "loss": 1.2264012098312378, "step": 2060 }, { "epoch": 0.4904415043853129, "grad_norm": 0.39453125, "learning_rate": 8e-05, "loss": 1.2854325771331787, "step": 2062 }, { "epoch": 0.49091719934591943, "grad_norm": 0.390625, "learning_rate": 8e-05, "loss": 1.3236126899719238, "step": 2064 }, { "epoch": 0.49139289430652594, "grad_norm": 0.41796875, "learning_rate": 8e-05, "loss": 1.2489556074142456, "step": 2066 }, { "epoch": 0.49186858926713245, "grad_norm": 0.416015625, "learning_rate": 8e-05, "loss": 1.2574775218963623, "step": 2068 }, { "epoch": 0.49234428422773896, "grad_norm": 0.41015625, "learning_rate": 8e-05, "loss": 1.2095019817352295, "step": 2070 }, { "epoch": 0.49281997918834547, "grad_norm": 0.36328125, "learning_rate": 8e-05, "loss": 1.2634193897247314, "step": 2072 }, { "epoch": 0.493295674148952, "grad_norm": 0.404296875, "learning_rate": 8e-05, "loss": 1.2667688131332397, "step": 2074 }, { "epoch": 0.4937713691095585, "grad_norm": 0.37890625, "learning_rate": 8e-05, "loss": 1.2418756484985352, "step": 2076 }, { "epoch": 0.494247064070165, "grad_norm": 0.390625, "learning_rate": 8e-05, "loss": 1.2592451572418213, "step": 2078 }, { "epoch": 0.4947227590307715, "grad_norm": 0.39453125, "learning_rate": 8e-05, "loss": 1.2803057432174683, "step": 2080 }, { "epoch": 0.495198453991378, "grad_norm": 0.44921875, "learning_rate": 8e-05, "loss": 1.2541866302490234, "step": 2082 }, { "epoch": 0.49567414895198453, "grad_norm": 0.3984375, "learning_rate": 8e-05, "loss": 1.2359161376953125, "step": 2084 }, { "epoch": 0.49614984391259104, "grad_norm": 0.41015625, "learning_rate": 8e-05, "loss": 1.2950963973999023, "step": 2086 }, { "epoch": 0.49662553887319755, "grad_norm": 0.400390625, "learning_rate": 8e-05, "loss": 1.283148169517517, "step": 2088 }, { "epoch": 0.49710123383380406, "grad_norm": 0.416015625, "learning_rate": 8e-05, "loss": 1.2806365489959717, "step": 2090 }, { "epoch": 0.49757692879441057, "grad_norm": 0.404296875, "learning_rate": 8e-05, "loss": 1.21940279006958, "step": 2092 }, { "epoch": 0.4980526237550171, "grad_norm": 0.41796875, "learning_rate": 8e-05, "loss": 1.243825912475586, "step": 2094 }, { "epoch": 0.4985283187156236, "grad_norm": 0.392578125, "learning_rate": 8e-05, "loss": 1.271630048751831, "step": 2096 }, { "epoch": 0.4990040136762301, "grad_norm": 0.38671875, "learning_rate": 8e-05, "loss": 1.2829158306121826, "step": 2098 }, { "epoch": 0.4994797086368366, "grad_norm": 0.40625, "learning_rate": 8e-05, "loss": 1.244653582572937, "step": 2100 }, { "epoch": 0.4999554035974431, "grad_norm": 0.3984375, "learning_rate": 8e-05, "loss": 1.2149741649627686, "step": 2102 }, { "epoch": 0.5004310985580497, "grad_norm": 0.404296875, "learning_rate": 8e-05, "loss": 1.2326526641845703, "step": 2104 }, { "epoch": 0.5009067935186562, "grad_norm": 0.392578125, "learning_rate": 8e-05, "loss": 1.2960628271102905, "step": 2106 }, { "epoch": 0.5013824884792627, "grad_norm": 0.40625, "learning_rate": 8e-05, "loss": 1.2571380138397217, "step": 2108 }, { "epoch": 0.5018581834398692, "grad_norm": 0.39453125, "learning_rate": 8e-05, "loss": 1.1806836128234863, "step": 2110 }, { "epoch": 0.5023338784004757, "grad_norm": 0.384765625, "learning_rate": 8e-05, "loss": 1.2098257541656494, "step": 2112 }, { "epoch": 0.5028095733610822, "grad_norm": 0.384765625, "learning_rate": 8e-05, "loss": 1.245821237564087, "step": 2114 }, { "epoch": 0.5032852683216887, "grad_norm": 0.37109375, "learning_rate": 8e-05, "loss": 1.2121808528900146, "step": 2116 }, { "epoch": 0.5037609632822952, "grad_norm": 0.380859375, "learning_rate": 8e-05, "loss": 1.271606683731079, "step": 2118 }, { "epoch": 0.5042366582429018, "grad_norm": 0.37109375, "learning_rate": 8e-05, "loss": 1.2480086088180542, "step": 2120 }, { "epoch": 0.5047123532035083, "grad_norm": 0.384765625, "learning_rate": 8e-05, "loss": 1.2532334327697754, "step": 2122 }, { "epoch": 0.5051880481641148, "grad_norm": 0.376953125, "learning_rate": 8e-05, "loss": 1.2482119798660278, "step": 2124 }, { "epoch": 0.5056637431247213, "grad_norm": 0.3828125, "learning_rate": 8e-05, "loss": 1.2053478956222534, "step": 2126 }, { "epoch": 0.5061394380853278, "grad_norm": 0.376953125, "learning_rate": 8e-05, "loss": 1.2333269119262695, "step": 2128 }, { "epoch": 0.5066151330459343, "grad_norm": 0.427734375, "learning_rate": 8e-05, "loss": 1.2575441598892212, "step": 2130 }, { "epoch": 0.5070908280065408, "grad_norm": 0.443359375, "learning_rate": 8e-05, "loss": 1.239387035369873, "step": 2132 }, { "epoch": 0.5075665229671473, "grad_norm": 0.435546875, "learning_rate": 8e-05, "loss": 1.2529371976852417, "step": 2134 }, { "epoch": 0.5080422179277538, "grad_norm": 0.439453125, "learning_rate": 8e-05, "loss": 1.2657462358474731, "step": 2136 }, { "epoch": 0.5085179128883603, "grad_norm": 0.390625, "learning_rate": 8e-05, "loss": 1.2482880353927612, "step": 2138 }, { "epoch": 0.5089936078489669, "grad_norm": 0.404296875, "learning_rate": 8e-05, "loss": 1.2640414237976074, "step": 2140 }, { "epoch": 0.5094693028095734, "grad_norm": 0.396484375, "learning_rate": 8e-05, "loss": 1.2489564418792725, "step": 2142 }, { "epoch": 0.5099449977701799, "grad_norm": 0.380859375, "learning_rate": 8e-05, "loss": 1.1927671432495117, "step": 2144 }, { "epoch": 0.5104206927307864, "grad_norm": 0.375, "learning_rate": 8e-05, "loss": 1.2743709087371826, "step": 2146 }, { "epoch": 0.5108963876913929, "grad_norm": 0.375, "learning_rate": 8e-05, "loss": 1.2421848773956299, "step": 2148 }, { "epoch": 0.5113720826519994, "grad_norm": 0.37890625, "learning_rate": 8e-05, "loss": 1.2825573682785034, "step": 2150 }, { "epoch": 0.5118477776126059, "grad_norm": 0.396484375, "learning_rate": 8e-05, "loss": 1.2068567276000977, "step": 2152 }, { "epoch": 0.5123234725732124, "grad_norm": 0.3828125, "learning_rate": 8e-05, "loss": 1.2677295207977295, "step": 2154 }, { "epoch": 0.5127991675338189, "grad_norm": 0.384765625, "learning_rate": 8e-05, "loss": 1.3184689283370972, "step": 2156 }, { "epoch": 0.5132748624944254, "grad_norm": 0.3671875, "learning_rate": 8e-05, "loss": 1.2172642946243286, "step": 2158 }, { "epoch": 0.513750557455032, "grad_norm": 0.412109375, "learning_rate": 8e-05, "loss": 1.2620975971221924, "step": 2160 }, { "epoch": 0.5142262524156385, "grad_norm": 0.396484375, "learning_rate": 8e-05, "loss": 1.1976842880249023, "step": 2162 }, { "epoch": 0.514701947376245, "grad_norm": 0.412109375, "learning_rate": 8e-05, "loss": 1.2858420610427856, "step": 2164 }, { "epoch": 0.5151776423368515, "grad_norm": 0.408203125, "learning_rate": 8e-05, "loss": 1.1963508129119873, "step": 2166 }, { "epoch": 0.515653337297458, "grad_norm": 0.439453125, "learning_rate": 8e-05, "loss": 1.2777037620544434, "step": 2168 }, { "epoch": 0.5161290322580645, "grad_norm": 0.4140625, "learning_rate": 8e-05, "loss": 1.2379100322723389, "step": 2170 }, { "epoch": 0.516604727218671, "grad_norm": 0.421875, "learning_rate": 8e-05, "loss": 1.3012006282806396, "step": 2172 }, { "epoch": 0.5170804221792775, "grad_norm": 0.41796875, "learning_rate": 8e-05, "loss": 1.2644760608673096, "step": 2174 }, { "epoch": 0.517556117139884, "grad_norm": 0.3984375, "learning_rate": 8e-05, "loss": 1.2243112325668335, "step": 2176 }, { "epoch": 0.5180318121004905, "grad_norm": 0.38671875, "learning_rate": 8e-05, "loss": 1.282224416732788, "step": 2178 }, { "epoch": 0.518507507061097, "grad_norm": 0.384765625, "learning_rate": 8e-05, "loss": 1.2459665536880493, "step": 2180 }, { "epoch": 0.5189832020217036, "grad_norm": 0.3828125, "learning_rate": 8e-05, "loss": 1.2327321767807007, "step": 2182 }, { "epoch": 0.5194588969823101, "grad_norm": 0.376953125, "learning_rate": 8e-05, "loss": 1.2502856254577637, "step": 2184 }, { "epoch": 0.5199345919429166, "grad_norm": 0.37890625, "learning_rate": 8e-05, "loss": 1.2643475532531738, "step": 2186 }, { "epoch": 0.5204102869035231, "grad_norm": 0.380859375, "learning_rate": 8e-05, "loss": 1.2353267669677734, "step": 2188 }, { "epoch": 0.5208859818641296, "grad_norm": 0.392578125, "learning_rate": 8e-05, "loss": 1.2246006727218628, "step": 2190 }, { "epoch": 0.5213616768247361, "grad_norm": 0.380859375, "learning_rate": 8e-05, "loss": 1.2746915817260742, "step": 2192 }, { "epoch": 0.5218373717853426, "grad_norm": 0.384765625, "learning_rate": 8e-05, "loss": 1.2547008991241455, "step": 2194 }, { "epoch": 0.5223130667459491, "grad_norm": 0.3984375, "learning_rate": 8e-05, "loss": 1.2143683433532715, "step": 2196 }, { "epoch": 0.5227887617065556, "grad_norm": 0.376953125, "learning_rate": 8e-05, "loss": 1.2223198413848877, "step": 2198 }, { "epoch": 0.5232644566671621, "grad_norm": 0.396484375, "learning_rate": 8e-05, "loss": 1.2719308137893677, "step": 2200 }, { "epoch": 0.5237401516277687, "grad_norm": 0.3984375, "learning_rate": 8e-05, "loss": 1.1987100839614868, "step": 2202 }, { "epoch": 0.5242158465883752, "grad_norm": 0.404296875, "learning_rate": 8e-05, "loss": 1.2595856189727783, "step": 2204 }, { "epoch": 0.5246915415489817, "grad_norm": 0.376953125, "learning_rate": 8e-05, "loss": 1.2550652027130127, "step": 2206 }, { "epoch": 0.5251672365095882, "grad_norm": 0.40234375, "learning_rate": 8e-05, "loss": 1.2664339542388916, "step": 2208 }, { "epoch": 0.5256429314701947, "grad_norm": 0.390625, "learning_rate": 8e-05, "loss": 1.2030470371246338, "step": 2210 }, { "epoch": 0.5261186264308012, "grad_norm": 0.384765625, "learning_rate": 8e-05, "loss": 1.257197618484497, "step": 2212 }, { "epoch": 0.5265943213914077, "grad_norm": 0.38671875, "learning_rate": 8e-05, "loss": 1.2173645496368408, "step": 2214 }, { "epoch": 0.5270700163520142, "grad_norm": 0.412109375, "learning_rate": 8e-05, "loss": 1.22458815574646, "step": 2216 }, { "epoch": 0.5275457113126207, "grad_norm": 0.455078125, "learning_rate": 8e-05, "loss": 1.2425655126571655, "step": 2218 }, { "epoch": 0.5280214062732272, "grad_norm": 0.427734375, "learning_rate": 8e-05, "loss": 1.2677123546600342, "step": 2220 }, { "epoch": 0.5284971012338338, "grad_norm": 0.416015625, "learning_rate": 8e-05, "loss": 1.203295111656189, "step": 2222 }, { "epoch": 0.5289727961944403, "grad_norm": 0.4296875, "learning_rate": 8e-05, "loss": 1.260411262512207, "step": 2224 }, { "epoch": 0.5294484911550468, "grad_norm": 0.40234375, "learning_rate": 8e-05, "loss": 1.270648717880249, "step": 2226 }, { "epoch": 0.5299241861156533, "grad_norm": 0.416015625, "learning_rate": 8e-05, "loss": 1.2358521223068237, "step": 2228 }, { "epoch": 0.5303998810762598, "grad_norm": 0.4140625, "learning_rate": 8e-05, "loss": 1.2879903316497803, "step": 2230 }, { "epoch": 0.5308755760368664, "grad_norm": 0.396484375, "learning_rate": 8e-05, "loss": 1.2541499137878418, "step": 2232 }, { "epoch": 0.5313512709974729, "grad_norm": 0.392578125, "learning_rate": 8e-05, "loss": 1.1964361667633057, "step": 2234 }, { "epoch": 0.5318269659580794, "grad_norm": 0.384765625, "learning_rate": 8e-05, "loss": 1.2229571342468262, "step": 2236 }, { "epoch": 0.5323026609186859, "grad_norm": 0.3828125, "learning_rate": 8e-05, "loss": 1.173935890197754, "step": 2238 }, { "epoch": 0.5327783558792925, "grad_norm": 0.392578125, "learning_rate": 8e-05, "loss": 1.2346327304840088, "step": 2240 }, { "epoch": 0.533254050839899, "grad_norm": 0.38671875, "learning_rate": 8e-05, "loss": 1.2555508613586426, "step": 2242 }, { "epoch": 0.5337297458005055, "grad_norm": 0.400390625, "learning_rate": 8e-05, "loss": 1.2254197597503662, "step": 2244 }, { "epoch": 0.534205440761112, "grad_norm": 0.38671875, "learning_rate": 8e-05, "loss": 1.2302241325378418, "step": 2246 }, { "epoch": 0.5346811357217185, "grad_norm": 0.37890625, "learning_rate": 8e-05, "loss": 1.2882341146469116, "step": 2248 }, { "epoch": 0.535156830682325, "grad_norm": 0.3671875, "learning_rate": 8e-05, "loss": 1.2494275569915771, "step": 2250 }, { "epoch": 0.5356325256429315, "grad_norm": 0.37890625, "learning_rate": 8e-05, "loss": 1.2567485570907593, "step": 2252 }, { "epoch": 0.536108220603538, "grad_norm": 0.388671875, "learning_rate": 8e-05, "loss": 1.2737491130828857, "step": 2254 }, { "epoch": 0.5365839155641445, "grad_norm": 0.3984375, "learning_rate": 8e-05, "loss": 1.2650439739227295, "step": 2256 }, { "epoch": 0.537059610524751, "grad_norm": 0.3828125, "learning_rate": 8e-05, "loss": 1.2500125169754028, "step": 2258 }, { "epoch": 0.5375353054853576, "grad_norm": 0.37109375, "learning_rate": 8e-05, "loss": 1.2558541297912598, "step": 2260 }, { "epoch": 0.5380110004459641, "grad_norm": 0.390625, "learning_rate": 8e-05, "loss": 1.2639573812484741, "step": 2262 }, { "epoch": 0.5384866954065706, "grad_norm": 0.380859375, "learning_rate": 8e-05, "loss": 1.2046759128570557, "step": 2264 }, { "epoch": 0.5389623903671771, "grad_norm": 0.380859375, "learning_rate": 8e-05, "loss": 1.3005247116088867, "step": 2266 }, { "epoch": 0.5394380853277836, "grad_norm": 0.3828125, "learning_rate": 8e-05, "loss": 1.2678768634796143, "step": 2268 }, { "epoch": 0.5399137802883901, "grad_norm": 0.396484375, "learning_rate": 8e-05, "loss": 1.2596511840820312, "step": 2270 }, { "epoch": 0.5403894752489966, "grad_norm": 0.384765625, "learning_rate": 8e-05, "loss": 1.2320735454559326, "step": 2272 }, { "epoch": 0.5408651702096031, "grad_norm": 0.423828125, "learning_rate": 8e-05, "loss": 1.2533533573150635, "step": 2274 }, { "epoch": 0.5413408651702096, "grad_norm": 0.40625, "learning_rate": 8e-05, "loss": 1.2232404947280884, "step": 2276 }, { "epoch": 0.5418165601308161, "grad_norm": 0.412109375, "learning_rate": 8e-05, "loss": 1.2465150356292725, "step": 2278 }, { "epoch": 0.5422922550914226, "grad_norm": 0.38671875, "learning_rate": 8e-05, "loss": 1.2359066009521484, "step": 2280 }, { "epoch": 0.5427679500520292, "grad_norm": 0.373046875, "learning_rate": 8e-05, "loss": 1.179032325744629, "step": 2282 }, { "epoch": 0.5432436450126357, "grad_norm": 0.37890625, "learning_rate": 8e-05, "loss": 1.2295031547546387, "step": 2284 }, { "epoch": 0.5437193399732422, "grad_norm": 0.392578125, "learning_rate": 8e-05, "loss": 1.2600789070129395, "step": 2286 }, { "epoch": 0.5441950349338487, "grad_norm": 0.3984375, "learning_rate": 8e-05, "loss": 1.2444316148757935, "step": 2288 }, { "epoch": 0.5446707298944552, "grad_norm": 0.3671875, "learning_rate": 8e-05, "loss": 1.1947736740112305, "step": 2290 }, { "epoch": 0.5451464248550617, "grad_norm": 0.3671875, "learning_rate": 8e-05, "loss": 1.2450361251831055, "step": 2292 }, { "epoch": 0.5456221198156682, "grad_norm": 0.3671875, "learning_rate": 8e-05, "loss": 1.189820647239685, "step": 2294 }, { "epoch": 0.5460978147762747, "grad_norm": 0.380859375, "learning_rate": 8e-05, "loss": 1.2700152397155762, "step": 2296 }, { "epoch": 0.5465735097368812, "grad_norm": 0.3828125, "learning_rate": 8e-05, "loss": 1.1997301578521729, "step": 2298 }, { "epoch": 0.5470492046974877, "grad_norm": 0.37109375, "learning_rate": 8e-05, "loss": 1.2718980312347412, "step": 2300 }, { "epoch": 0.5475248996580943, "grad_norm": 0.408203125, "learning_rate": 8e-05, "loss": 1.2360846996307373, "step": 2302 }, { "epoch": 0.5480005946187008, "grad_norm": 0.419921875, "learning_rate": 8e-05, "loss": 1.2985812425613403, "step": 2304 }, { "epoch": 0.5484762895793073, "grad_norm": 0.40625, "learning_rate": 8e-05, "loss": 1.2672054767608643, "step": 2306 }, { "epoch": 0.5489519845399138, "grad_norm": 0.416015625, "learning_rate": 8e-05, "loss": 1.1932951211929321, "step": 2308 }, { "epoch": 0.5494276795005203, "grad_norm": 0.416015625, "learning_rate": 8e-05, "loss": 1.2733159065246582, "step": 2310 }, { "epoch": 0.5499033744611268, "grad_norm": 0.3984375, "learning_rate": 8e-05, "loss": 1.3332585096359253, "step": 2312 }, { "epoch": 0.5503790694217333, "grad_norm": 0.400390625, "learning_rate": 8e-05, "loss": 1.267357587814331, "step": 2314 }, { "epoch": 0.5508547643823398, "grad_norm": 0.388671875, "learning_rate": 8e-05, "loss": 1.2183654308319092, "step": 2316 }, { "epoch": 0.5513304593429463, "grad_norm": 0.376953125, "learning_rate": 8e-05, "loss": 1.244970440864563, "step": 2318 }, { "epoch": 0.5518061543035528, "grad_norm": 0.376953125, "learning_rate": 8e-05, "loss": 1.2315990924835205, "step": 2320 }, { "epoch": 0.5522818492641594, "grad_norm": 0.388671875, "learning_rate": 8e-05, "loss": 1.251598834991455, "step": 2322 }, { "epoch": 0.5527575442247659, "grad_norm": 0.380859375, "learning_rate": 8e-05, "loss": 1.277734637260437, "step": 2324 }, { "epoch": 0.5532332391853724, "grad_norm": 0.4140625, "learning_rate": 8e-05, "loss": 1.27321195602417, "step": 2326 }, { "epoch": 0.5537089341459789, "grad_norm": 0.40625, "learning_rate": 8e-05, "loss": 1.2716056108474731, "step": 2328 }, { "epoch": 0.5541846291065854, "grad_norm": 0.373046875, "learning_rate": 8e-05, "loss": 1.2450883388519287, "step": 2330 }, { "epoch": 0.5546603240671919, "grad_norm": 0.39453125, "learning_rate": 8e-05, "loss": 1.2274010181427002, "step": 2332 }, { "epoch": 0.5551360190277984, "grad_norm": 0.427734375, "learning_rate": 8e-05, "loss": 1.2627809047698975, "step": 2334 }, { "epoch": 0.5556117139884049, "grad_norm": 0.404296875, "learning_rate": 8e-05, "loss": 1.1710472106933594, "step": 2336 }, { "epoch": 0.5560874089490114, "grad_norm": 0.388671875, "learning_rate": 8e-05, "loss": 1.2715725898742676, "step": 2338 }, { "epoch": 0.5565631039096179, "grad_norm": 0.369140625, "learning_rate": 8e-05, "loss": 1.2005977630615234, "step": 2340 }, { "epoch": 0.5570387988702244, "grad_norm": 0.388671875, "learning_rate": 8e-05, "loss": 1.254575252532959, "step": 2342 }, { "epoch": 0.557514493830831, "grad_norm": 0.37109375, "learning_rate": 8e-05, "loss": 1.2813735008239746, "step": 2344 }, { "epoch": 0.5579901887914375, "grad_norm": 0.400390625, "learning_rate": 8e-05, "loss": 1.2456145286560059, "step": 2346 }, { "epoch": 0.558465883752044, "grad_norm": 0.3828125, "learning_rate": 8e-05, "loss": 1.2311184406280518, "step": 2348 }, { "epoch": 0.5589415787126505, "grad_norm": 0.384765625, "learning_rate": 8e-05, "loss": 1.2404234409332275, "step": 2350 }, { "epoch": 0.559417273673257, "grad_norm": 0.404296875, "learning_rate": 8e-05, "loss": 1.280110239982605, "step": 2352 }, { "epoch": 0.5598929686338635, "grad_norm": 0.380859375, "learning_rate": 8e-05, "loss": 1.250441551208496, "step": 2354 }, { "epoch": 0.56036866359447, "grad_norm": 0.380859375, "learning_rate": 8e-05, "loss": 1.2462382316589355, "step": 2356 }, { "epoch": 0.5608443585550765, "grad_norm": 0.375, "learning_rate": 8e-05, "loss": 1.2737480401992798, "step": 2358 }, { "epoch": 0.561320053515683, "grad_norm": 0.380859375, "learning_rate": 8e-05, "loss": 1.276233434677124, "step": 2360 }, { "epoch": 0.5617957484762895, "grad_norm": 0.3828125, "learning_rate": 8e-05, "loss": 1.2580044269561768, "step": 2362 }, { "epoch": 0.5622714434368961, "grad_norm": 0.380859375, "learning_rate": 8e-05, "loss": 1.247071385383606, "step": 2364 }, { "epoch": 0.5627471383975026, "grad_norm": 0.373046875, "learning_rate": 8e-05, "loss": 1.2175970077514648, "step": 2366 }, { "epoch": 0.5632228333581091, "grad_norm": 0.359375, "learning_rate": 8e-05, "loss": 1.217498540878296, "step": 2368 }, { "epoch": 0.5636985283187156, "grad_norm": 0.37890625, "learning_rate": 8e-05, "loss": 1.242640495300293, "step": 2370 }, { "epoch": 0.5641742232793221, "grad_norm": 0.39453125, "learning_rate": 8e-05, "loss": 1.2354379892349243, "step": 2372 }, { "epoch": 0.5646499182399286, "grad_norm": 0.400390625, "learning_rate": 8e-05, "loss": 1.3069782257080078, "step": 2374 }, { "epoch": 0.5651256132005351, "grad_norm": 0.388671875, "learning_rate": 8e-05, "loss": 1.2084176540374756, "step": 2376 }, { "epoch": 0.5656013081611416, "grad_norm": 0.39453125, "learning_rate": 8e-05, "loss": 1.2104275226593018, "step": 2378 }, { "epoch": 0.5660770031217481, "grad_norm": 0.396484375, "learning_rate": 8e-05, "loss": 1.2741985321044922, "step": 2380 }, { "epoch": 0.5665526980823546, "grad_norm": 0.39453125, "learning_rate": 8e-05, "loss": 1.230583906173706, "step": 2382 }, { "epoch": 0.5670283930429612, "grad_norm": 0.3984375, "learning_rate": 8e-05, "loss": 1.1865020990371704, "step": 2384 }, { "epoch": 0.5675040880035677, "grad_norm": 0.375, "learning_rate": 8e-05, "loss": 1.2901579141616821, "step": 2386 }, { "epoch": 0.5679797829641742, "grad_norm": 0.40625, "learning_rate": 8e-05, "loss": 1.2829887866973877, "step": 2388 }, { "epoch": 0.5684554779247807, "grad_norm": 0.412109375, "learning_rate": 8e-05, "loss": 1.2711780071258545, "step": 2390 }, { "epoch": 0.5689311728853872, "grad_norm": 0.380859375, "learning_rate": 8e-05, "loss": 1.2897326946258545, "step": 2392 }, { "epoch": 0.5694068678459937, "grad_norm": 0.37890625, "learning_rate": 8e-05, "loss": 1.2700235843658447, "step": 2394 }, { "epoch": 0.5698825628066002, "grad_norm": 0.35546875, "learning_rate": 8e-05, "loss": 1.2746386528015137, "step": 2396 }, { "epoch": 0.5703582577672068, "grad_norm": 0.349609375, "learning_rate": 8e-05, "loss": 1.2726595401763916, "step": 2398 }, { "epoch": 0.5708339527278133, "grad_norm": 0.376953125, "learning_rate": 8e-05, "loss": 1.2205541133880615, "step": 2400 }, { "epoch": 0.5713096476884199, "grad_norm": 0.380859375, "learning_rate": 8e-05, "loss": 1.2636919021606445, "step": 2402 }, { "epoch": 0.5717853426490264, "grad_norm": 0.390625, "learning_rate": 8e-05, "loss": 1.2315757274627686, "step": 2404 }, { "epoch": 0.5722610376096329, "grad_norm": 0.3671875, "learning_rate": 8e-05, "loss": 1.2510037422180176, "step": 2406 }, { "epoch": 0.5727367325702394, "grad_norm": 0.380859375, "learning_rate": 8e-05, "loss": 1.2190053462982178, "step": 2408 }, { "epoch": 0.5732124275308459, "grad_norm": 0.357421875, "learning_rate": 8e-05, "loss": 1.2241978645324707, "step": 2410 }, { "epoch": 0.5736881224914524, "grad_norm": 0.376953125, "learning_rate": 8e-05, "loss": 1.2733025550842285, "step": 2412 }, { "epoch": 0.5741638174520589, "grad_norm": 0.3671875, "learning_rate": 8e-05, "loss": 1.1712183952331543, "step": 2414 }, { "epoch": 0.5746395124126654, "grad_norm": 0.37890625, "learning_rate": 8e-05, "loss": 1.2777495384216309, "step": 2416 }, { "epoch": 0.5751152073732719, "grad_norm": 0.37109375, "learning_rate": 8e-05, "loss": 1.272843837738037, "step": 2418 }, { "epoch": 0.5755909023338784, "grad_norm": 0.3671875, "learning_rate": 8e-05, "loss": 1.2414473295211792, "step": 2420 }, { "epoch": 0.576066597294485, "grad_norm": 0.390625, "learning_rate": 8e-05, "loss": 1.2244441509246826, "step": 2422 }, { "epoch": 0.5765422922550915, "grad_norm": 0.404296875, "learning_rate": 8e-05, "loss": 1.2712140083312988, "step": 2424 }, { "epoch": 0.577017987215698, "grad_norm": 0.40234375, "learning_rate": 8e-05, "loss": 1.2499234676361084, "step": 2426 }, { "epoch": 0.5774936821763045, "grad_norm": 0.404296875, "learning_rate": 8e-05, "loss": 1.286064624786377, "step": 2428 }, { "epoch": 0.577969377136911, "grad_norm": 0.373046875, "learning_rate": 8e-05, "loss": 1.2061920166015625, "step": 2430 }, { "epoch": 0.5784450720975175, "grad_norm": 0.396484375, "learning_rate": 8e-05, "loss": 1.2993005514144897, "step": 2432 }, { "epoch": 0.578920767058124, "grad_norm": 0.3828125, "learning_rate": 8e-05, "loss": 1.26462721824646, "step": 2434 }, { "epoch": 0.5793964620187305, "grad_norm": 0.3984375, "learning_rate": 8e-05, "loss": 1.2716201543807983, "step": 2436 }, { "epoch": 0.579872156979337, "grad_norm": 0.376953125, "learning_rate": 8e-05, "loss": 1.2104942798614502, "step": 2438 }, { "epoch": 0.5803478519399435, "grad_norm": 0.3828125, "learning_rate": 8e-05, "loss": 1.2860119342803955, "step": 2440 }, { "epoch": 0.58082354690055, "grad_norm": 0.421875, "learning_rate": 8e-05, "loss": 1.3008224964141846, "step": 2442 }, { "epoch": 0.5812992418611566, "grad_norm": 0.41796875, "learning_rate": 8e-05, "loss": 1.2540391683578491, "step": 2444 }, { "epoch": 0.5817749368217631, "grad_norm": 0.427734375, "learning_rate": 8e-05, "loss": 1.2667243480682373, "step": 2446 }, { "epoch": 0.5822506317823696, "grad_norm": 0.416015625, "learning_rate": 8e-05, "loss": 1.2277895212173462, "step": 2448 }, { "epoch": 0.5827263267429761, "grad_norm": 0.416015625, "learning_rate": 8e-05, "loss": 1.2290725708007812, "step": 2450 }, { "epoch": 0.5832020217035826, "grad_norm": 0.40234375, "learning_rate": 8e-05, "loss": 1.227077841758728, "step": 2452 }, { "epoch": 0.5836777166641891, "grad_norm": 0.3671875, "learning_rate": 8e-05, "loss": 1.2680089473724365, "step": 2454 }, { "epoch": 0.5841534116247956, "grad_norm": 0.36328125, "learning_rate": 8e-05, "loss": 1.236987590789795, "step": 2456 }, { "epoch": 0.5846291065854021, "grad_norm": 0.37890625, "learning_rate": 8e-05, "loss": 1.255210518836975, "step": 2458 }, { "epoch": 0.5851048015460086, "grad_norm": 0.388671875, "learning_rate": 8e-05, "loss": 1.26492440700531, "step": 2460 }, { "epoch": 0.5855804965066151, "grad_norm": 0.38671875, "learning_rate": 8e-05, "loss": 1.2221426963806152, "step": 2462 }, { "epoch": 0.5860561914672217, "grad_norm": 0.41796875, "learning_rate": 8e-05, "loss": 1.2715880870819092, "step": 2464 }, { "epoch": 0.5865318864278282, "grad_norm": 0.3671875, "learning_rate": 8e-05, "loss": 1.2061941623687744, "step": 2466 }, { "epoch": 0.5870075813884347, "grad_norm": 0.384765625, "learning_rate": 8e-05, "loss": 1.1762068271636963, "step": 2468 }, { "epoch": 0.5874832763490412, "grad_norm": 0.41015625, "learning_rate": 8e-05, "loss": 1.2648086547851562, "step": 2470 }, { "epoch": 0.5879589713096477, "grad_norm": 0.376953125, "learning_rate": 8e-05, "loss": 1.2263320684432983, "step": 2472 }, { "epoch": 0.5884346662702542, "grad_norm": 0.400390625, "learning_rate": 8e-05, "loss": 1.2949507236480713, "step": 2474 }, { "epoch": 0.5889103612308607, "grad_norm": 0.38671875, "learning_rate": 8e-05, "loss": 1.2580509185791016, "step": 2476 }, { "epoch": 0.5893860561914672, "grad_norm": 0.392578125, "learning_rate": 8e-05, "loss": 1.2583489418029785, "step": 2478 }, { "epoch": 0.5898617511520737, "grad_norm": 0.369140625, "learning_rate": 8e-05, "loss": 1.2290489673614502, "step": 2480 }, { "epoch": 0.5903374461126802, "grad_norm": 0.35546875, "learning_rate": 8e-05, "loss": 1.1821609735488892, "step": 2482 }, { "epoch": 0.5908131410732868, "grad_norm": 0.37109375, "learning_rate": 8e-05, "loss": 1.265315294265747, "step": 2484 }, { "epoch": 0.5912888360338933, "grad_norm": 0.3671875, "learning_rate": 8e-05, "loss": 1.2531765699386597, "step": 2486 }, { "epoch": 0.5917645309944998, "grad_norm": 0.373046875, "learning_rate": 8e-05, "loss": 1.2487919330596924, "step": 2488 }, { "epoch": 0.5922402259551063, "grad_norm": 0.3671875, "learning_rate": 8e-05, "loss": 1.2386280298233032, "step": 2490 }, { "epoch": 0.5927159209157128, "grad_norm": 0.3828125, "learning_rate": 8e-05, "loss": 1.2352405786514282, "step": 2492 }, { "epoch": 0.5931916158763193, "grad_norm": 0.408203125, "learning_rate": 8e-05, "loss": 1.2241952419281006, "step": 2494 }, { "epoch": 0.5936673108369258, "grad_norm": 0.40234375, "learning_rate": 8e-05, "loss": 1.2149953842163086, "step": 2496 }, { "epoch": 0.5941430057975323, "grad_norm": 0.41796875, "learning_rate": 8e-05, "loss": 1.2526779174804688, "step": 2498 }, { "epoch": 0.5946187007581388, "grad_norm": 0.40625, "learning_rate": 8e-05, "loss": 1.2221908569335938, "step": 2500 }, { "epoch": 0.5950943957187453, "grad_norm": 0.373046875, "learning_rate": 8e-05, "loss": 1.2482354640960693, "step": 2502 }, { "epoch": 0.5955700906793518, "grad_norm": 0.40625, "learning_rate": 8e-05, "loss": 1.189880609512329, "step": 2504 }, { "epoch": 0.5960457856399584, "grad_norm": 0.380859375, "learning_rate": 8e-05, "loss": 1.2685422897338867, "step": 2506 }, { "epoch": 0.5965214806005649, "grad_norm": 0.365234375, "learning_rate": 8e-05, "loss": 1.2144076824188232, "step": 2508 }, { "epoch": 0.5969971755611714, "grad_norm": 0.37890625, "learning_rate": 8e-05, "loss": 1.1419060230255127, "step": 2510 }, { "epoch": 0.5974728705217779, "grad_norm": 0.40234375, "learning_rate": 8e-05, "loss": 1.2288837432861328, "step": 2512 }, { "epoch": 0.5979485654823844, "grad_norm": 0.39453125, "learning_rate": 8e-05, "loss": 1.2173268795013428, "step": 2514 }, { "epoch": 0.5984242604429909, "grad_norm": 0.408203125, "learning_rate": 8e-05, "loss": 1.2456581592559814, "step": 2516 }, { "epoch": 0.5988999554035974, "grad_norm": 0.412109375, "learning_rate": 8e-05, "loss": 1.310453176498413, "step": 2518 }, { "epoch": 0.5993756503642039, "grad_norm": 0.431640625, "learning_rate": 8e-05, "loss": 1.271226406097412, "step": 2520 }, { "epoch": 0.5998513453248104, "grad_norm": 0.3984375, "learning_rate": 8e-05, "loss": 1.2474400997161865, "step": 2522 }, { "epoch": 0.600327040285417, "grad_norm": 0.384765625, "learning_rate": 8e-05, "loss": 1.2638752460479736, "step": 2524 }, { "epoch": 0.6008027352460235, "grad_norm": 0.404296875, "learning_rate": 8e-05, "loss": 1.2449238300323486, "step": 2526 }, { "epoch": 0.60127843020663, "grad_norm": 0.41796875, "learning_rate": 8e-05, "loss": 1.2357442378997803, "step": 2528 }, { "epoch": 0.6017541251672365, "grad_norm": 0.392578125, "learning_rate": 8e-05, "loss": 1.2108559608459473, "step": 2530 }, { "epoch": 0.602229820127843, "grad_norm": 0.388671875, "learning_rate": 8e-05, "loss": 1.2793264389038086, "step": 2532 }, { "epoch": 0.6027055150884495, "grad_norm": 0.3828125, "learning_rate": 8e-05, "loss": 1.2260792255401611, "step": 2534 }, { "epoch": 0.603181210049056, "grad_norm": 0.369140625, "learning_rate": 8e-05, "loss": 1.2476468086242676, "step": 2536 }, { "epoch": 0.6036569050096625, "grad_norm": 0.380859375, "learning_rate": 8e-05, "loss": 1.2396502494812012, "step": 2538 }, { "epoch": 0.604132599970269, "grad_norm": 0.37890625, "learning_rate": 8e-05, "loss": 1.2091515064239502, "step": 2540 }, { "epoch": 0.6046082949308755, "grad_norm": 0.3671875, "learning_rate": 8e-05, "loss": 1.2249176502227783, "step": 2542 }, { "epoch": 0.605083989891482, "grad_norm": 0.392578125, "learning_rate": 8e-05, "loss": 1.274390459060669, "step": 2544 }, { "epoch": 0.6055596848520886, "grad_norm": 0.380859375, "learning_rate": 8e-05, "loss": 1.2077170610427856, "step": 2546 }, { "epoch": 0.6060353798126951, "grad_norm": 0.384765625, "learning_rate": 8e-05, "loss": 1.2330358028411865, "step": 2548 }, { "epoch": 0.6065110747733016, "grad_norm": 0.38671875, "learning_rate": 8e-05, "loss": 1.2728490829467773, "step": 2550 }, { "epoch": 0.6069867697339081, "grad_norm": 0.35546875, "learning_rate": 8e-05, "loss": 1.1577916145324707, "step": 2552 }, { "epoch": 0.6074624646945146, "grad_norm": 0.3828125, "learning_rate": 8e-05, "loss": 1.2705035209655762, "step": 2554 }, { "epoch": 0.6079381596551211, "grad_norm": 0.369140625, "learning_rate": 8e-05, "loss": 1.2194724082946777, "step": 2556 }, { "epoch": 0.6084138546157276, "grad_norm": 0.36328125, "learning_rate": 8e-05, "loss": 1.223502278327942, "step": 2558 }, { "epoch": 0.6088895495763341, "grad_norm": 0.365234375, "learning_rate": 8e-05, "loss": 1.2284711599349976, "step": 2560 }, { "epoch": 0.6093652445369407, "grad_norm": 0.3828125, "learning_rate": 8e-05, "loss": 1.2612671852111816, "step": 2562 }, { "epoch": 0.6098409394975473, "grad_norm": 0.38671875, "learning_rate": 8e-05, "loss": 1.2475411891937256, "step": 2564 }, { "epoch": 0.6103166344581538, "grad_norm": 0.40234375, "learning_rate": 8e-05, "loss": 1.2346876859664917, "step": 2566 }, { "epoch": 0.6107923294187603, "grad_norm": 0.37890625, "learning_rate": 8e-05, "loss": 1.1875958442687988, "step": 2568 }, { "epoch": 0.6112680243793668, "grad_norm": 0.373046875, "learning_rate": 8e-05, "loss": 1.2490816116333008, "step": 2570 }, { "epoch": 0.6117437193399733, "grad_norm": 0.423828125, "learning_rate": 8e-05, "loss": 1.2314293384552002, "step": 2572 }, { "epoch": 0.6122194143005798, "grad_norm": 0.416015625, "learning_rate": 8e-05, "loss": 1.2150312662124634, "step": 2574 }, { "epoch": 0.6126951092611863, "grad_norm": 0.380859375, "learning_rate": 8e-05, "loss": 1.2333028316497803, "step": 2576 }, { "epoch": 0.6131708042217928, "grad_norm": 0.376953125, "learning_rate": 8e-05, "loss": 1.2211978435516357, "step": 2578 }, { "epoch": 0.6136464991823993, "grad_norm": 0.38671875, "learning_rate": 8e-05, "loss": 1.2382583618164062, "step": 2580 }, { "epoch": 0.6141221941430058, "grad_norm": 0.416015625, "learning_rate": 8e-05, "loss": 1.2812304496765137, "step": 2582 }, { "epoch": 0.6145978891036123, "grad_norm": 0.400390625, "learning_rate": 8e-05, "loss": 1.2513656616210938, "step": 2584 }, { "epoch": 0.6150735840642189, "grad_norm": 0.4140625, "learning_rate": 8e-05, "loss": 1.2187235355377197, "step": 2586 }, { "epoch": 0.6155492790248254, "grad_norm": 0.38671875, "learning_rate": 8e-05, "loss": 1.2611520290374756, "step": 2588 }, { "epoch": 0.6160249739854319, "grad_norm": 0.376953125, "learning_rate": 8e-05, "loss": 1.2477173805236816, "step": 2590 }, { "epoch": 0.6165006689460384, "grad_norm": 0.35546875, "learning_rate": 8e-05, "loss": 1.1624467372894287, "step": 2592 }, { "epoch": 0.6169763639066449, "grad_norm": 0.359375, "learning_rate": 8e-05, "loss": 1.2253656387329102, "step": 2594 }, { "epoch": 0.6174520588672514, "grad_norm": 0.373046875, "learning_rate": 8e-05, "loss": 1.2614085674285889, "step": 2596 }, { "epoch": 0.6179277538278579, "grad_norm": 0.369140625, "learning_rate": 8e-05, "loss": 1.1892552375793457, "step": 2598 }, { "epoch": 0.6184034487884644, "grad_norm": 0.365234375, "learning_rate": 8e-05, "loss": 1.304673671722412, "step": 2600 }, { "epoch": 0.6188791437490709, "grad_norm": 0.3671875, "learning_rate": 8e-05, "loss": 1.1897804737091064, "step": 2602 }, { "epoch": 0.6193548387096774, "grad_norm": 0.3671875, "learning_rate": 8e-05, "loss": 1.2602784633636475, "step": 2604 }, { "epoch": 0.619830533670284, "grad_norm": 0.35546875, "learning_rate": 8e-05, "loss": 1.1673520803451538, "step": 2606 }, { "epoch": 0.6203062286308905, "grad_norm": 0.36328125, "learning_rate": 8e-05, "loss": 1.2243266105651855, "step": 2608 }, { "epoch": 0.620781923591497, "grad_norm": 0.392578125, "learning_rate": 8e-05, "loss": 1.2490243911743164, "step": 2610 }, { "epoch": 0.6212576185521035, "grad_norm": 0.373046875, "learning_rate": 8e-05, "loss": 1.1751642227172852, "step": 2612 }, { "epoch": 0.62173331351271, "grad_norm": 0.390625, "learning_rate": 8e-05, "loss": 1.2147471904754639, "step": 2614 }, { "epoch": 0.6222090084733165, "grad_norm": 0.3671875, "learning_rate": 8e-05, "loss": 1.2540574073791504, "step": 2616 }, { "epoch": 0.622684703433923, "grad_norm": 0.380859375, "learning_rate": 8e-05, "loss": 1.2662967443466187, "step": 2618 }, { "epoch": 0.6231603983945295, "grad_norm": 0.373046875, "learning_rate": 8e-05, "loss": 1.2161445617675781, "step": 2620 }, { "epoch": 0.623636093355136, "grad_norm": 0.390625, "learning_rate": 8e-05, "loss": 1.2706882953643799, "step": 2622 }, { "epoch": 0.6241117883157425, "grad_norm": 0.375, "learning_rate": 8e-05, "loss": 1.2533507347106934, "step": 2624 }, { "epoch": 0.624587483276349, "grad_norm": 0.375, "learning_rate": 8e-05, "loss": 1.2325465679168701, "step": 2626 }, { "epoch": 0.6250631782369556, "grad_norm": 0.396484375, "learning_rate": 8e-05, "loss": 1.2847120761871338, "step": 2628 }, { "epoch": 0.6255388731975621, "grad_norm": 0.359375, "learning_rate": 8e-05, "loss": 1.2086182832717896, "step": 2630 }, { "epoch": 0.6260145681581686, "grad_norm": 0.388671875, "learning_rate": 8e-05, "loss": 1.2454938888549805, "step": 2632 }, { "epoch": 0.6264902631187751, "grad_norm": 0.37890625, "learning_rate": 8e-05, "loss": 1.253279685974121, "step": 2634 }, { "epoch": 0.6269659580793816, "grad_norm": 0.40234375, "learning_rate": 8e-05, "loss": 1.200972318649292, "step": 2636 }, { "epoch": 0.6274416530399881, "grad_norm": 0.416015625, "learning_rate": 8e-05, "loss": 1.2435922622680664, "step": 2638 }, { "epoch": 0.6279173480005946, "grad_norm": 0.3984375, "learning_rate": 8e-05, "loss": 1.2706129550933838, "step": 2640 }, { "epoch": 0.6283930429612011, "grad_norm": 0.38671875, "learning_rate": 8e-05, "loss": 1.2585011720657349, "step": 2642 }, { "epoch": 0.6288687379218076, "grad_norm": 0.39453125, "learning_rate": 8e-05, "loss": 1.2510229349136353, "step": 2644 }, { "epoch": 0.6293444328824142, "grad_norm": 0.37890625, "learning_rate": 8e-05, "loss": 1.2418980598449707, "step": 2646 }, { "epoch": 0.6298201278430207, "grad_norm": 0.39453125, "learning_rate": 8e-05, "loss": 1.2494986057281494, "step": 2648 }, { "epoch": 0.6302958228036272, "grad_norm": 0.384765625, "learning_rate": 8e-05, "loss": 1.2714817523956299, "step": 2650 }, { "epoch": 0.6307715177642337, "grad_norm": 0.38671875, "learning_rate": 8e-05, "loss": 1.2182557582855225, "step": 2652 }, { "epoch": 0.6312472127248402, "grad_norm": 0.37890625, "learning_rate": 8e-05, "loss": 1.2318391799926758, "step": 2654 }, { "epoch": 0.6317229076854467, "grad_norm": 0.39453125, "learning_rate": 8e-05, "loss": 1.2475109100341797, "step": 2656 }, { "epoch": 0.6321986026460532, "grad_norm": 0.390625, "learning_rate": 8e-05, "loss": 1.2712761163711548, "step": 2658 }, { "epoch": 0.6326742976066597, "grad_norm": 0.380859375, "learning_rate": 8e-05, "loss": 1.2394472360610962, "step": 2660 }, { "epoch": 0.6331499925672662, "grad_norm": 0.3828125, "learning_rate": 8e-05, "loss": 1.2201759815216064, "step": 2662 }, { "epoch": 0.6336256875278727, "grad_norm": 0.369140625, "learning_rate": 8e-05, "loss": 1.1588757038116455, "step": 2664 }, { "epoch": 0.6341013824884792, "grad_norm": 0.349609375, "learning_rate": 8e-05, "loss": 1.1701884269714355, "step": 2666 }, { "epoch": 0.6345770774490858, "grad_norm": 0.359375, "learning_rate": 8e-05, "loss": 1.2547426223754883, "step": 2668 }, { "epoch": 0.6350527724096923, "grad_norm": 0.3671875, "learning_rate": 8e-05, "loss": 1.2501137256622314, "step": 2670 }, { "epoch": 0.6355284673702988, "grad_norm": 0.396484375, "learning_rate": 8e-05, "loss": 1.2706825733184814, "step": 2672 }, { "epoch": 0.6360041623309053, "grad_norm": 0.3671875, "learning_rate": 8e-05, "loss": 1.2127528190612793, "step": 2674 }, { "epoch": 0.6364798572915118, "grad_norm": 0.3828125, "learning_rate": 8e-05, "loss": 1.2191646099090576, "step": 2676 }, { "epoch": 0.6369555522521183, "grad_norm": 0.369140625, "learning_rate": 8e-05, "loss": 1.2211954593658447, "step": 2678 }, { "epoch": 0.6374312472127248, "grad_norm": 0.380859375, "learning_rate": 8e-05, "loss": 1.2068610191345215, "step": 2680 }, { "epoch": 0.6379069421733313, "grad_norm": 0.376953125, "learning_rate": 8e-05, "loss": 1.2012649774551392, "step": 2682 }, { "epoch": 0.6383826371339378, "grad_norm": 0.390625, "learning_rate": 8e-05, "loss": 1.2034168243408203, "step": 2684 }, { "epoch": 0.6388583320945443, "grad_norm": 0.359375, "learning_rate": 8e-05, "loss": 1.1512229442596436, "step": 2686 }, { "epoch": 0.6393340270551509, "grad_norm": 0.380859375, "learning_rate": 8e-05, "loss": 1.273275375366211, "step": 2688 }, { "epoch": 0.6398097220157574, "grad_norm": 0.380859375, "learning_rate": 8e-05, "loss": 1.2607433795928955, "step": 2690 }, { "epoch": 0.6402854169763639, "grad_norm": 0.375, "learning_rate": 8e-05, "loss": 1.2025877237319946, "step": 2692 }, { "epoch": 0.6407611119369704, "grad_norm": 0.375, "learning_rate": 8e-05, "loss": 1.237597942352295, "step": 2694 }, { "epoch": 0.6412368068975769, "grad_norm": 0.384765625, "learning_rate": 8e-05, "loss": 1.2055954933166504, "step": 2696 }, { "epoch": 0.6417125018581834, "grad_norm": 0.359375, "learning_rate": 8e-05, "loss": 1.2079732418060303, "step": 2698 }, { "epoch": 0.6421881968187899, "grad_norm": 0.3515625, "learning_rate": 8e-05, "loss": 1.2707421779632568, "step": 2700 }, { "epoch": 0.6426638917793964, "grad_norm": 0.388671875, "learning_rate": 8e-05, "loss": 1.229077696800232, "step": 2702 }, { "epoch": 0.6431395867400029, "grad_norm": 0.388671875, "learning_rate": 8e-05, "loss": 1.171201229095459, "step": 2704 }, { "epoch": 0.6436152817006094, "grad_norm": 0.349609375, "learning_rate": 8e-05, "loss": 1.2386970520019531, "step": 2706 }, { "epoch": 0.644090976661216, "grad_norm": 0.373046875, "learning_rate": 8e-05, "loss": 1.2508089542388916, "step": 2708 }, { "epoch": 0.6445666716218225, "grad_norm": 0.361328125, "learning_rate": 8e-05, "loss": 1.2166051864624023, "step": 2710 }, { "epoch": 0.645042366582429, "grad_norm": 0.36328125, "learning_rate": 8e-05, "loss": 1.1718792915344238, "step": 2712 }, { "epoch": 0.6455180615430355, "grad_norm": 0.380859375, "learning_rate": 8e-05, "loss": 1.208460807800293, "step": 2714 }, { "epoch": 0.645993756503642, "grad_norm": 0.357421875, "learning_rate": 8e-05, "loss": 1.2658112049102783, "step": 2716 }, { "epoch": 0.6464694514642485, "grad_norm": 0.359375, "learning_rate": 8e-05, "loss": 1.2218315601348877, "step": 2718 }, { "epoch": 0.646945146424855, "grad_norm": 0.36328125, "learning_rate": 8e-05, "loss": 1.2742373943328857, "step": 2720 }, { "epoch": 0.6474208413854615, "grad_norm": 0.392578125, "learning_rate": 8e-05, "loss": 1.2009773254394531, "step": 2722 }, { "epoch": 0.647896536346068, "grad_norm": 0.375, "learning_rate": 8e-05, "loss": 1.2288341522216797, "step": 2724 }, { "epoch": 0.6483722313066745, "grad_norm": 0.388671875, "learning_rate": 8e-05, "loss": 1.2737244367599487, "step": 2726 }, { "epoch": 0.6488479262672812, "grad_norm": 0.3828125, "learning_rate": 8e-05, "loss": 1.289430856704712, "step": 2728 }, { "epoch": 0.6493236212278877, "grad_norm": 0.4140625, "learning_rate": 8e-05, "loss": 1.2465755939483643, "step": 2730 }, { "epoch": 0.6497993161884942, "grad_norm": 0.39453125, "learning_rate": 8e-05, "loss": 1.2071137428283691, "step": 2732 }, { "epoch": 0.6502750111491007, "grad_norm": 0.38671875, "learning_rate": 8e-05, "loss": 1.194615125656128, "step": 2734 }, { "epoch": 0.6507507061097072, "grad_norm": 0.396484375, "learning_rate": 8e-05, "loss": 1.2709908485412598, "step": 2736 }, { "epoch": 0.6512264010703137, "grad_norm": 0.392578125, "learning_rate": 8e-05, "loss": 1.2470756769180298, "step": 2738 }, { "epoch": 0.6517020960309202, "grad_norm": 0.380859375, "learning_rate": 8e-05, "loss": 1.2120921611785889, "step": 2740 }, { "epoch": 0.6521777909915267, "grad_norm": 0.373046875, "learning_rate": 8e-05, "loss": 1.2401468753814697, "step": 2742 }, { "epoch": 0.6526534859521332, "grad_norm": 0.376953125, "learning_rate": 8e-05, "loss": 1.2416322231292725, "step": 2744 }, { "epoch": 0.6531291809127397, "grad_norm": 0.376953125, "learning_rate": 8e-05, "loss": 1.1909356117248535, "step": 2746 }, { "epoch": 0.6536048758733463, "grad_norm": 0.37109375, "learning_rate": 8e-05, "loss": 1.2629019021987915, "step": 2748 }, { "epoch": 0.6540805708339528, "grad_norm": 0.390625, "learning_rate": 8e-05, "loss": 1.2401649951934814, "step": 2750 }, { "epoch": 0.6545562657945593, "grad_norm": 0.390625, "learning_rate": 8e-05, "loss": 1.2681682109832764, "step": 2752 }, { "epoch": 0.6550319607551658, "grad_norm": 0.380859375, "learning_rate": 8e-05, "loss": 1.2728466987609863, "step": 2754 }, { "epoch": 0.6555076557157723, "grad_norm": 0.361328125, "learning_rate": 8e-05, "loss": 1.223940372467041, "step": 2756 }, { "epoch": 0.6559833506763788, "grad_norm": 0.373046875, "learning_rate": 8e-05, "loss": 1.2075097560882568, "step": 2758 }, { "epoch": 0.6564590456369853, "grad_norm": 0.38671875, "learning_rate": 8e-05, "loss": 1.266689419746399, "step": 2760 }, { "epoch": 0.6569347405975918, "grad_norm": 0.408203125, "learning_rate": 8e-05, "loss": 1.2576415538787842, "step": 2762 }, { "epoch": 0.6574104355581983, "grad_norm": 0.390625, "learning_rate": 8e-05, "loss": 1.2286550998687744, "step": 2764 }, { "epoch": 0.6578861305188048, "grad_norm": 0.388671875, "learning_rate": 8e-05, "loss": 1.2462449073791504, "step": 2766 }, { "epoch": 0.6583618254794114, "grad_norm": 0.38671875, "learning_rate": 8e-05, "loss": 1.2215523719787598, "step": 2768 }, { "epoch": 0.6588375204400179, "grad_norm": 0.37109375, "learning_rate": 8e-05, "loss": 1.200398325920105, "step": 2770 }, { "epoch": 0.6593132154006244, "grad_norm": 0.373046875, "learning_rate": 8e-05, "loss": 1.2600317001342773, "step": 2772 }, { "epoch": 0.6597889103612309, "grad_norm": 0.38671875, "learning_rate": 8e-05, "loss": 1.2789270877838135, "step": 2774 }, { "epoch": 0.6602646053218374, "grad_norm": 0.38671875, "learning_rate": 8e-05, "loss": 1.252886176109314, "step": 2776 }, { "epoch": 0.6607403002824439, "grad_norm": 0.408203125, "learning_rate": 8e-05, "loss": 1.1967723369598389, "step": 2778 }, { "epoch": 0.6612159952430504, "grad_norm": 0.38671875, "learning_rate": 8e-05, "loss": 1.171466588973999, "step": 2780 }, { "epoch": 0.6616916902036569, "grad_norm": 0.380859375, "learning_rate": 8e-05, "loss": 1.2854139804840088, "step": 2782 }, { "epoch": 0.6621673851642634, "grad_norm": 0.39453125, "learning_rate": 8e-05, "loss": 1.259742021560669, "step": 2784 }, { "epoch": 0.6626430801248699, "grad_norm": 0.380859375, "learning_rate": 8e-05, "loss": 1.2996937036514282, "step": 2786 }, { "epoch": 0.6631187750854765, "grad_norm": 0.369140625, "learning_rate": 8e-05, "loss": 1.2012677192687988, "step": 2788 }, { "epoch": 0.663594470046083, "grad_norm": 0.400390625, "learning_rate": 8e-05, "loss": 1.2383891344070435, "step": 2790 }, { "epoch": 0.6640701650066895, "grad_norm": 0.373046875, "learning_rate": 8e-05, "loss": 1.2412991523742676, "step": 2792 }, { "epoch": 0.664545859967296, "grad_norm": 0.431640625, "learning_rate": 8e-05, "loss": 1.2173049449920654, "step": 2794 }, { "epoch": 0.6650215549279025, "grad_norm": 0.40234375, "learning_rate": 8e-05, "loss": 1.2093093395233154, "step": 2796 }, { "epoch": 0.665497249888509, "grad_norm": 0.37109375, "learning_rate": 8e-05, "loss": 1.2188637256622314, "step": 2798 }, { "epoch": 0.6659729448491155, "grad_norm": 0.3984375, "learning_rate": 8e-05, "loss": 1.2203969955444336, "step": 2800 }, { "epoch": 0.666448639809722, "grad_norm": 0.384765625, "learning_rate": 8e-05, "loss": 1.2513853311538696, "step": 2802 }, { "epoch": 0.6669243347703285, "grad_norm": 0.40625, "learning_rate": 8e-05, "loss": 1.1890287399291992, "step": 2804 }, { "epoch": 0.667400029730935, "grad_norm": 0.40234375, "learning_rate": 8e-05, "loss": 1.2430322170257568, "step": 2806 }, { "epoch": 0.6678757246915416, "grad_norm": 0.388671875, "learning_rate": 8e-05, "loss": 1.2834746837615967, "step": 2808 }, { "epoch": 0.6683514196521481, "grad_norm": 0.388671875, "learning_rate": 8e-05, "loss": 1.2333581447601318, "step": 2810 }, { "epoch": 0.6688271146127546, "grad_norm": 0.39453125, "learning_rate": 8e-05, "loss": 1.2029738426208496, "step": 2812 }, { "epoch": 0.6693028095733611, "grad_norm": 0.3984375, "learning_rate": 8e-05, "loss": 1.2190194129943848, "step": 2814 }, { "epoch": 0.6697785045339676, "grad_norm": 0.4140625, "learning_rate": 8e-05, "loss": 1.2467260360717773, "step": 2816 }, { "epoch": 0.6702541994945741, "grad_norm": 0.412109375, "learning_rate": 8e-05, "loss": 1.2383447885513306, "step": 2818 }, { "epoch": 0.6707298944551806, "grad_norm": 0.40234375, "learning_rate": 8e-05, "loss": 1.2235246896743774, "step": 2820 }, { "epoch": 0.6712055894157871, "grad_norm": 0.373046875, "learning_rate": 8e-05, "loss": 1.2301299571990967, "step": 2822 }, { "epoch": 0.6716812843763936, "grad_norm": 0.373046875, "learning_rate": 8e-05, "loss": 1.2388970851898193, "step": 2824 }, { "epoch": 0.6721569793370001, "grad_norm": 0.392578125, "learning_rate": 8e-05, "loss": 1.2599682807922363, "step": 2826 }, { "epoch": 0.6726326742976066, "grad_norm": 0.390625, "learning_rate": 8e-05, "loss": 1.2594590187072754, "step": 2828 }, { "epoch": 0.6731083692582132, "grad_norm": 0.3828125, "learning_rate": 8e-05, "loss": 1.2377604246139526, "step": 2830 }, { "epoch": 0.6735840642188197, "grad_norm": 0.396484375, "learning_rate": 8e-05, "loss": 1.2168340682983398, "step": 2832 }, { "epoch": 0.6740597591794262, "grad_norm": 0.365234375, "learning_rate": 8e-05, "loss": 1.1372761726379395, "step": 2834 }, { "epoch": 0.6745354541400327, "grad_norm": 0.373046875, "learning_rate": 8e-05, "loss": 1.17765212059021, "step": 2836 }, { "epoch": 0.6750111491006392, "grad_norm": 0.39453125, "learning_rate": 8e-05, "loss": 1.235781192779541, "step": 2838 }, { "epoch": 0.6754868440612457, "grad_norm": 0.3984375, "learning_rate": 8e-05, "loss": 1.243680715560913, "step": 2840 }, { "epoch": 0.6759625390218522, "grad_norm": 0.375, "learning_rate": 8e-05, "loss": 1.2039899826049805, "step": 2842 }, { "epoch": 0.6764382339824587, "grad_norm": 0.380859375, "learning_rate": 8e-05, "loss": 1.273780345916748, "step": 2844 }, { "epoch": 0.6769139289430652, "grad_norm": 0.40234375, "learning_rate": 8e-05, "loss": 1.1936399936676025, "step": 2846 }, { "epoch": 0.6773896239036717, "grad_norm": 0.37890625, "learning_rate": 8e-05, "loss": 1.1913855075836182, "step": 2848 }, { "epoch": 0.6778653188642783, "grad_norm": 0.392578125, "learning_rate": 8e-05, "loss": 1.2655634880065918, "step": 2850 }, { "epoch": 0.6783410138248848, "grad_norm": 0.40234375, "learning_rate": 8e-05, "loss": 1.229090690612793, "step": 2852 }, { "epoch": 0.6788167087854913, "grad_norm": 0.388671875, "learning_rate": 8e-05, "loss": 1.229933738708496, "step": 2854 }, { "epoch": 0.6792924037460978, "grad_norm": 0.390625, "learning_rate": 8e-05, "loss": 1.2448334693908691, "step": 2856 }, { "epoch": 0.6797680987067043, "grad_norm": 0.38671875, "learning_rate": 8e-05, "loss": 1.1777703762054443, "step": 2858 }, { "epoch": 0.6802437936673108, "grad_norm": 0.390625, "learning_rate": 8e-05, "loss": 1.1988234519958496, "step": 2860 }, { "epoch": 0.6807194886279173, "grad_norm": 0.357421875, "learning_rate": 8e-05, "loss": 1.2633662223815918, "step": 2862 }, { "epoch": 0.6811951835885238, "grad_norm": 0.396484375, "learning_rate": 8e-05, "loss": 1.1914260387420654, "step": 2864 }, { "epoch": 0.6816708785491303, "grad_norm": 0.357421875, "learning_rate": 8e-05, "loss": 1.1301052570343018, "step": 2866 }, { "epoch": 0.6821465735097368, "grad_norm": 0.37890625, "learning_rate": 8e-05, "loss": 1.2647099494934082, "step": 2868 }, { "epoch": 0.6826222684703434, "grad_norm": 0.392578125, "learning_rate": 8e-05, "loss": 1.2434825897216797, "step": 2870 }, { "epoch": 0.6830979634309499, "grad_norm": 0.369140625, "learning_rate": 8e-05, "loss": 1.1619213819503784, "step": 2872 }, { "epoch": 0.6835736583915564, "grad_norm": 0.361328125, "learning_rate": 8e-05, "loss": 1.272236943244934, "step": 2874 }, { "epoch": 0.6840493533521629, "grad_norm": 0.373046875, "learning_rate": 8e-05, "loss": 1.2712485790252686, "step": 2876 }, { "epoch": 0.6845250483127694, "grad_norm": 0.380859375, "learning_rate": 8e-05, "loss": 1.2562975883483887, "step": 2878 }, { "epoch": 0.6850007432733759, "grad_norm": 0.384765625, "learning_rate": 8e-05, "loss": 1.2557085752487183, "step": 2880 }, { "epoch": 0.6854764382339824, "grad_norm": 0.400390625, "learning_rate": 8e-05, "loss": 1.2001773118972778, "step": 2882 }, { "epoch": 0.6859521331945889, "grad_norm": 0.384765625, "learning_rate": 8e-05, "loss": 1.2661209106445312, "step": 2884 }, { "epoch": 0.6864278281551954, "grad_norm": 0.392578125, "learning_rate": 8e-05, "loss": 1.2700567245483398, "step": 2886 }, { "epoch": 0.6869035231158019, "grad_norm": 0.40234375, "learning_rate": 8e-05, "loss": 1.201700210571289, "step": 2888 }, { "epoch": 0.6873792180764084, "grad_norm": 0.423828125, "learning_rate": 8e-05, "loss": 1.2309627532958984, "step": 2890 }, { "epoch": 0.687854913037015, "grad_norm": 0.455078125, "learning_rate": 8e-05, "loss": 1.2442858219146729, "step": 2892 }, { "epoch": 0.6883306079976216, "grad_norm": 0.416015625, "learning_rate": 8e-05, "loss": 1.2312313318252563, "step": 2894 }, { "epoch": 0.6888063029582281, "grad_norm": 0.384765625, "learning_rate": 8e-05, "loss": 1.168707013130188, "step": 2896 }, { "epoch": 0.6892819979188346, "grad_norm": 0.39453125, "learning_rate": 8e-05, "loss": 1.2480907440185547, "step": 2898 }, { "epoch": 0.6897576928794411, "grad_norm": 0.36328125, "learning_rate": 8e-05, "loss": 1.2292897701263428, "step": 2900 }, { "epoch": 0.6902333878400476, "grad_norm": 0.373046875, "learning_rate": 8e-05, "loss": 1.2023284435272217, "step": 2902 }, { "epoch": 0.6907090828006541, "grad_norm": 0.3671875, "learning_rate": 8e-05, "loss": 1.2282081842422485, "step": 2904 }, { "epoch": 0.6911847777612606, "grad_norm": 0.408203125, "learning_rate": 8e-05, "loss": 1.2390121221542358, "step": 2906 }, { "epoch": 0.6916604727218671, "grad_norm": 0.38671875, "learning_rate": 8e-05, "loss": 1.255518913269043, "step": 2908 }, { "epoch": 0.6921361676824737, "grad_norm": 0.365234375, "learning_rate": 8e-05, "loss": 1.1897988319396973, "step": 2910 }, { "epoch": 0.6926118626430802, "grad_norm": 0.376953125, "learning_rate": 8e-05, "loss": 1.1889443397521973, "step": 2912 }, { "epoch": 0.6930875576036867, "grad_norm": 0.373046875, "learning_rate": 8e-05, "loss": 1.2725920677185059, "step": 2914 }, { "epoch": 0.6935632525642932, "grad_norm": 0.345703125, "learning_rate": 8e-05, "loss": 1.2496650218963623, "step": 2916 }, { "epoch": 0.6940389475248997, "grad_norm": 0.35546875, "learning_rate": 8e-05, "loss": 1.1894876956939697, "step": 2918 }, { "epoch": 0.6945146424855062, "grad_norm": 0.361328125, "learning_rate": 8e-05, "loss": 1.2089958190917969, "step": 2920 }, { "epoch": 0.6949903374461127, "grad_norm": 0.3671875, "learning_rate": 8e-05, "loss": 1.2501626014709473, "step": 2922 }, { "epoch": 0.6954660324067192, "grad_norm": 0.365234375, "learning_rate": 8e-05, "loss": 1.221423625946045, "step": 2924 }, { "epoch": 0.6959417273673257, "grad_norm": 0.375, "learning_rate": 8e-05, "loss": 1.2127522230148315, "step": 2926 }, { "epoch": 0.6964174223279322, "grad_norm": 0.3984375, "learning_rate": 8e-05, "loss": 1.2586814165115356, "step": 2928 }, { "epoch": 0.6968931172885388, "grad_norm": 0.365234375, "learning_rate": 8e-05, "loss": 1.2253403663635254, "step": 2930 }, { "epoch": 0.6973688122491453, "grad_norm": 0.357421875, "learning_rate": 8e-05, "loss": 1.2009187936782837, "step": 2932 }, { "epoch": 0.6978445072097518, "grad_norm": 0.345703125, "learning_rate": 8e-05, "loss": 1.179222583770752, "step": 2934 }, { "epoch": 0.6983202021703583, "grad_norm": 0.36328125, "learning_rate": 8e-05, "loss": 1.1949589252471924, "step": 2936 }, { "epoch": 0.6987958971309648, "grad_norm": 0.365234375, "learning_rate": 8e-05, "loss": 1.2387232780456543, "step": 2938 }, { "epoch": 0.6992715920915713, "grad_norm": 0.36328125, "learning_rate": 8e-05, "loss": 1.184262990951538, "step": 2940 }, { "epoch": 0.6997472870521778, "grad_norm": 0.35546875, "learning_rate": 8e-05, "loss": 1.1451635360717773, "step": 2942 }, { "epoch": 0.7002229820127843, "grad_norm": 0.41796875, "learning_rate": 8e-05, "loss": 1.2731480598449707, "step": 2944 }, { "epoch": 0.7006986769733908, "grad_norm": 0.431640625, "learning_rate": 7.999950424154985e-05, "loss": 1.2006233930587769, "step": 2946 }, { "epoch": 0.7011743719339973, "grad_norm": 0.408203125, "learning_rate": 7.999801697848817e-05, "loss": 1.2164214849472046, "step": 2948 }, { "epoch": 0.7016500668946039, "grad_norm": 0.390625, "learning_rate": 7.999553824768115e-05, "loss": 1.2032701969146729, "step": 2950 }, { "epoch": 0.7021257618552104, "grad_norm": 0.3671875, "learning_rate": 7.999206811057136e-05, "loss": 1.184319257736206, "step": 2952 }, { "epoch": 0.7026014568158169, "grad_norm": 0.388671875, "learning_rate": 7.998760665317632e-05, "loss": 1.1767771244049072, "step": 2954 }, { "epoch": 0.7030771517764234, "grad_norm": 0.365234375, "learning_rate": 7.998215398608625e-05, "loss": 1.1959552764892578, "step": 2956 }, { "epoch": 0.7035528467370299, "grad_norm": 0.353515625, "learning_rate": 7.997571024446146e-05, "loss": 1.1779606342315674, "step": 2958 }, { "epoch": 0.7040285416976364, "grad_norm": 0.37109375, "learning_rate": 7.996827558802894e-05, "loss": 1.1682159900665283, "step": 2960 }, { "epoch": 0.7045042366582429, "grad_norm": 0.375, "learning_rate": 7.995985020107833e-05, "loss": 1.181810736656189, "step": 2962 }, { "epoch": 0.7049799316188494, "grad_norm": 0.345703125, "learning_rate": 7.995043429245751e-05, "loss": 1.2362987995147705, "step": 2964 }, { "epoch": 0.7054556265794559, "grad_norm": 0.353515625, "learning_rate": 7.994002809556727e-05, "loss": 1.1899755001068115, "step": 2966 }, { "epoch": 0.7059313215400624, "grad_norm": 0.369140625, "learning_rate": 7.992863186835562e-05, "loss": 1.223832607269287, "step": 2968 }, { "epoch": 0.706407016500669, "grad_norm": 0.375, "learning_rate": 7.991624589331135e-05, "loss": 1.2033984661102295, "step": 2970 }, { "epoch": 0.7068827114612755, "grad_norm": 0.3984375, "learning_rate": 7.990287047745706e-05, "loss": 1.2263352870941162, "step": 2972 }, { "epoch": 0.707358406421882, "grad_norm": 0.36328125, "learning_rate": 7.988850595234152e-05, "loss": 1.204215168952942, "step": 2974 }, { "epoch": 0.7078341013824885, "grad_norm": 0.359375, "learning_rate": 7.987315267403146e-05, "loss": 1.2107601165771484, "step": 2976 }, { "epoch": 0.708309796343095, "grad_norm": 0.376953125, "learning_rate": 7.985681102310276e-05, "loss": 1.2664358615875244, "step": 2978 }, { "epoch": 0.7087854913037015, "grad_norm": 0.33984375, "learning_rate": 7.983948140463098e-05, "loss": 1.1956796646118164, "step": 2980 }, { "epoch": 0.709261186264308, "grad_norm": 0.369140625, "learning_rate": 7.982116424818139e-05, "loss": 1.2163138389587402, "step": 2982 }, { "epoch": 0.7097368812249145, "grad_norm": 0.34375, "learning_rate": 7.980186000779822e-05, "loss": 1.1702892780303955, "step": 2984 }, { "epoch": 0.710212576185521, "grad_norm": 0.3671875, "learning_rate": 7.978156916199348e-05, "loss": 1.2452645301818848, "step": 2986 }, { "epoch": 0.7106882711461275, "grad_norm": 0.376953125, "learning_rate": 7.976029221373511e-05, "loss": 1.1621694564819336, "step": 2988 }, { "epoch": 0.711163966106734, "grad_norm": 0.353515625, "learning_rate": 7.973802969043444e-05, "loss": 1.2300595045089722, "step": 2990 }, { "epoch": 0.7116396610673406, "grad_norm": 0.3515625, "learning_rate": 7.971478214393316e-05, "loss": 1.1861531734466553, "step": 2992 }, { "epoch": 0.7121153560279471, "grad_norm": 0.359375, "learning_rate": 7.969055015048968e-05, "loss": 1.2321807146072388, "step": 2994 }, { "epoch": 0.7125910509885536, "grad_norm": 0.3828125, "learning_rate": 7.966533431076474e-05, "loss": 1.197440266609192, "step": 2996 }, { "epoch": 0.7130667459491601, "grad_norm": 0.353515625, "learning_rate": 7.963913524980666e-05, "loss": 1.1787972450256348, "step": 2998 }, { "epoch": 0.7135424409097666, "grad_norm": 0.353515625, "learning_rate": 7.961195361703569e-05, "loss": 1.2083191871643066, "step": 3000 }, { "epoch": 0.7140181358703731, "grad_norm": 0.38671875, "learning_rate": 7.958379008622808e-05, "loss": 1.178969144821167, "step": 3002 }, { "epoch": 0.7144938308309796, "grad_norm": 0.37890625, "learning_rate": 7.955464535549922e-05, "loss": 1.2047157287597656, "step": 3004 }, { "epoch": 0.7149695257915861, "grad_norm": 0.375, "learning_rate": 7.952452014728645e-05, "loss": 1.1746503114700317, "step": 3006 }, { "epoch": 0.7154452207521926, "grad_norm": 0.38671875, "learning_rate": 7.949341520833109e-05, "loss": 1.1968495845794678, "step": 3008 }, { "epoch": 0.7159209157127991, "grad_norm": 0.34765625, "learning_rate": 7.946133130965995e-05, "loss": 1.1814994812011719, "step": 3010 }, { "epoch": 0.7163966106734057, "grad_norm": 0.373046875, "learning_rate": 7.942826924656624e-05, "loss": 1.2259728908538818, "step": 3012 }, { "epoch": 0.7168723056340122, "grad_norm": 0.3828125, "learning_rate": 7.939422983858982e-05, "loss": 1.2128264904022217, "step": 3014 }, { "epoch": 0.7173480005946187, "grad_norm": 0.353515625, "learning_rate": 7.935921392949688e-05, "loss": 1.1720407009124756, "step": 3016 }, { "epoch": 0.7178236955552252, "grad_norm": 0.357421875, "learning_rate": 7.932322238725907e-05, "loss": 1.187741994857788, "step": 3018 }, { "epoch": 0.7182993905158317, "grad_norm": 0.359375, "learning_rate": 7.928625610403196e-05, "loss": 1.2031012773513794, "step": 3020 }, { "epoch": 0.7187750854764382, "grad_norm": 0.36328125, "learning_rate": 7.924831599613289e-05, "loss": 1.2213904857635498, "step": 3022 }, { "epoch": 0.7192507804370447, "grad_norm": 0.384765625, "learning_rate": 7.920940300401832e-05, "loss": 1.2365423440933228, "step": 3024 }, { "epoch": 0.7197264753976512, "grad_norm": 0.40234375, "learning_rate": 7.91695180922605e-05, "loss": 1.2173717021942139, "step": 3026 }, { "epoch": 0.7202021703582577, "grad_norm": 0.3828125, "learning_rate": 7.912866224952352e-05, "loss": 1.1911011934280396, "step": 3028 }, { "epoch": 0.7206778653188642, "grad_norm": 0.38671875, "learning_rate": 7.908683648853886e-05, "loss": 1.1721656322479248, "step": 3030 }, { "epoch": 0.7211535602794708, "grad_norm": 0.384765625, "learning_rate": 7.904404184608021e-05, "loss": 1.2273123264312744, "step": 3032 }, { "epoch": 0.7216292552400773, "grad_norm": 0.3671875, "learning_rate": 7.900027938293788e-05, "loss": 1.1623331308364868, "step": 3034 }, { "epoch": 0.7221049502006838, "grad_norm": 0.365234375, "learning_rate": 7.895555018389241e-05, "loss": 1.1802709102630615, "step": 3036 }, { "epoch": 0.7225806451612903, "grad_norm": 0.369140625, "learning_rate": 7.890985535768771e-05, "loss": 1.2304480075836182, "step": 3038 }, { "epoch": 0.7230563401218968, "grad_norm": 0.35546875, "learning_rate": 7.88631960370036e-05, "loss": 1.182260513305664, "step": 3040 }, { "epoch": 0.7235320350825033, "grad_norm": 0.361328125, "learning_rate": 7.881557337842769e-05, "loss": 1.2020962238311768, "step": 3042 }, { "epoch": 0.7240077300431098, "grad_norm": 0.38671875, "learning_rate": 7.876698856242677e-05, "loss": 1.1832443475723267, "step": 3044 }, { "epoch": 0.7244834250037163, "grad_norm": 0.392578125, "learning_rate": 7.871744279331747e-05, "loss": 1.223937749862671, "step": 3046 }, { "epoch": 0.7249591199643228, "grad_norm": 0.412109375, "learning_rate": 7.866693729923651e-05, "loss": 1.2505052089691162, "step": 3048 }, { "epoch": 0.7254348149249293, "grad_norm": 0.39453125, "learning_rate": 7.861547333211014e-05, "loss": 1.2611567974090576, "step": 3050 }, { "epoch": 0.7259105098855358, "grad_norm": 0.41015625, "learning_rate": 7.85630521676232e-05, "loss": 1.2265489101409912, "step": 3052 }, { "epoch": 0.7263862048461424, "grad_norm": 0.37109375, "learning_rate": 7.850967510518743e-05, "loss": 1.2124598026275635, "step": 3054 }, { "epoch": 0.7268618998067489, "grad_norm": 0.3828125, "learning_rate": 7.845534346790934e-05, "loss": 1.1696916818618774, "step": 3056 }, { "epoch": 0.7273375947673555, "grad_norm": 0.390625, "learning_rate": 7.840005860255733e-05, "loss": 1.2019386291503906, "step": 3058 }, { "epoch": 0.727813289727962, "grad_norm": 0.375, "learning_rate": 7.834382187952839e-05, "loss": 1.2763334512710571, "step": 3060 }, { "epoch": 0.7282889846885685, "grad_norm": 0.37890625, "learning_rate": 7.828663469281401e-05, "loss": 1.2215170860290527, "step": 3062 }, { "epoch": 0.728764679649175, "grad_norm": 0.3828125, "learning_rate": 7.822849845996578e-05, "loss": 1.2576022148132324, "step": 3064 }, { "epoch": 0.7292403746097815, "grad_norm": 0.369140625, "learning_rate": 7.81694146220601e-05, "loss": 1.2104671001434326, "step": 3066 }, { "epoch": 0.729716069570388, "grad_norm": 0.369140625, "learning_rate": 7.810938464366258e-05, "loss": 1.2412121295928955, "step": 3068 }, { "epoch": 0.7301917645309945, "grad_norm": 0.365234375, "learning_rate": 7.804841001279169e-05, "loss": 1.2783949375152588, "step": 3070 }, { "epoch": 0.7306674594916011, "grad_norm": 0.359375, "learning_rate": 7.798649224088184e-05, "loss": 1.3144667148590088, "step": 3072 }, { "epoch": 0.7311431544522076, "grad_norm": 0.37109375, "learning_rate": 7.792363286274595e-05, "loss": 1.1726528406143188, "step": 3074 }, { "epoch": 0.7316188494128141, "grad_norm": 0.38671875, "learning_rate": 7.785983343653742e-05, "loss": 1.2941590547561646, "step": 3076 }, { "epoch": 0.7320945443734206, "grad_norm": 0.36328125, "learning_rate": 7.779509554371152e-05, "loss": 1.184098482131958, "step": 3078 }, { "epoch": 0.7325702393340271, "grad_norm": 0.359375, "learning_rate": 7.772942078898607e-05, "loss": 1.202735424041748, "step": 3080 }, { "epoch": 0.7330459342946336, "grad_norm": 0.36328125, "learning_rate": 7.766281080030182e-05, "loss": 1.2427330017089844, "step": 3082 }, { "epoch": 0.7335216292552401, "grad_norm": 0.3828125, "learning_rate": 7.7595267228782e-05, "loss": 1.2848570346832275, "step": 3084 }, { "epoch": 0.7339973242158466, "grad_norm": 0.376953125, "learning_rate": 7.752679174869145e-05, "loss": 1.2101168632507324, "step": 3086 }, { "epoch": 0.7344730191764531, "grad_norm": 0.373046875, "learning_rate": 7.745738605739504e-05, "loss": 1.2171400785446167, "step": 3088 }, { "epoch": 0.7349487141370596, "grad_norm": 0.40234375, "learning_rate": 7.738705187531568e-05, "loss": 1.2718677520751953, "step": 3090 }, { "epoch": 0.7354244090976662, "grad_norm": 0.4140625, "learning_rate": 7.731579094589161e-05, "loss": 1.219995141029358, "step": 3092 }, { "epoch": 0.7359001040582727, "grad_norm": 0.41015625, "learning_rate": 7.724360503553326e-05, "loss": 1.2589280605316162, "step": 3094 }, { "epoch": 0.7363757990188792, "grad_norm": 0.400390625, "learning_rate": 7.717049593357937e-05, "loss": 1.2229852676391602, "step": 3096 }, { "epoch": 0.7368514939794857, "grad_norm": 0.375, "learning_rate": 7.709646545225266e-05, "loss": 1.2284798622131348, "step": 3098 }, { "epoch": 0.7373271889400922, "grad_norm": 0.384765625, "learning_rate": 7.7021515426615e-05, "loss": 1.2732608318328857, "step": 3100 }, { "epoch": 0.7378028839006987, "grad_norm": 0.404296875, "learning_rate": 7.694564771452179e-05, "loss": 1.215606927871704, "step": 3102 }, { "epoch": 0.7382785788613052, "grad_norm": 0.416015625, "learning_rate": 7.686886419657603e-05, "loss": 1.194861650466919, "step": 3104 }, { "epoch": 0.7387542738219117, "grad_norm": 0.3984375, "learning_rate": 7.67911667760816e-05, "loss": 1.1924793720245361, "step": 3106 }, { "epoch": 0.7392299687825182, "grad_norm": 0.3984375, "learning_rate": 7.671255737899613e-05, "loss": 1.196773648262024, "step": 3108 }, { "epoch": 0.7397056637431247, "grad_norm": 0.39453125, "learning_rate": 7.663303795388326e-05, "loss": 1.2454726696014404, "step": 3110 }, { "epoch": 0.7401813587037313, "grad_norm": 0.384765625, "learning_rate": 7.655261047186437e-05, "loss": 1.2148265838623047, "step": 3112 }, { "epoch": 0.7406570536643378, "grad_norm": 0.40234375, "learning_rate": 7.647127692656961e-05, "loss": 1.2220816612243652, "step": 3114 }, { "epoch": 0.7411327486249443, "grad_norm": 0.404296875, "learning_rate": 7.638903933408862e-05, "loss": 1.186138391494751, "step": 3116 }, { "epoch": 0.7416084435855508, "grad_norm": 0.392578125, "learning_rate": 7.630589973292046e-05, "loss": 1.1910457611083984, "step": 3118 }, { "epoch": 0.7420841385461573, "grad_norm": 0.427734375, "learning_rate": 7.622186018392313e-05, "loss": 1.182339072227478, "step": 3120 }, { "epoch": 0.7425598335067638, "grad_norm": 0.39453125, "learning_rate": 7.613692277026247e-05, "loss": 1.2056699991226196, "step": 3122 }, { "epoch": 0.7430355284673703, "grad_norm": 0.3828125, "learning_rate": 7.605108959736048e-05, "loss": 1.228093147277832, "step": 3124 }, { "epoch": 0.7435112234279768, "grad_norm": 0.380859375, "learning_rate": 7.596436279284322e-05, "loss": 1.2399944067001343, "step": 3126 }, { "epoch": 0.7439869183885833, "grad_norm": 0.373046875, "learning_rate": 7.587674450648798e-05, "loss": 1.2229018211364746, "step": 3128 }, { "epoch": 0.7444626133491898, "grad_norm": 0.34375, "learning_rate": 7.578823691017007e-05, "loss": 1.2147870063781738, "step": 3130 }, { "epoch": 0.7449383083097963, "grad_norm": 0.349609375, "learning_rate": 7.569884219780893e-05, "loss": 1.18184494972229, "step": 3132 }, { "epoch": 0.7454140032704029, "grad_norm": 0.36328125, "learning_rate": 7.560856258531374e-05, "loss": 1.2729527950286865, "step": 3134 }, { "epoch": 0.7458896982310094, "grad_norm": 0.369140625, "learning_rate": 7.551740031052857e-05, "loss": 1.2199832201004028, "step": 3136 }, { "epoch": 0.7463653931916159, "grad_norm": 0.384765625, "learning_rate": 7.54253576331768e-05, "loss": 1.2424662113189697, "step": 3138 }, { "epoch": 0.7468410881522224, "grad_norm": 0.3515625, "learning_rate": 7.53324368348052e-05, "loss": 1.1974238157272339, "step": 3140 }, { "epoch": 0.7473167831128289, "grad_norm": 0.365234375, "learning_rate": 7.52386402187273e-05, "loss": 1.2078831195831299, "step": 3142 }, { "epoch": 0.7477924780734354, "grad_norm": 0.36328125, "learning_rate": 7.514397010996637e-05, "loss": 1.2366812229156494, "step": 3144 }, { "epoch": 0.7482681730340419, "grad_norm": 0.37109375, "learning_rate": 7.504842885519771e-05, "loss": 1.2229359149932861, "step": 3146 }, { "epoch": 0.7487438679946484, "grad_norm": 0.37109375, "learning_rate": 7.495201882269055e-05, "loss": 1.2356886863708496, "step": 3148 }, { "epoch": 0.7492195629552549, "grad_norm": 0.36328125, "learning_rate": 7.485474240224932e-05, "loss": 1.2112306356430054, "step": 3150 }, { "epoch": 0.7496952579158614, "grad_norm": 0.375, "learning_rate": 7.475660200515437e-05, "loss": 1.1738417148590088, "step": 3152 }, { "epoch": 0.750170952876468, "grad_norm": 0.376953125, "learning_rate": 7.465760006410228e-05, "loss": 1.197131633758545, "step": 3154 }, { "epoch": 0.7506466478370745, "grad_norm": 0.376953125, "learning_rate": 7.455773903314544e-05, "loss": 1.1941673755645752, "step": 3156 }, { "epoch": 0.751122342797681, "grad_norm": 0.375, "learning_rate": 7.445702138763142e-05, "loss": 1.2553668022155762, "step": 3158 }, { "epoch": 0.7515980377582875, "grad_norm": 0.361328125, "learning_rate": 7.435544962414136e-05, "loss": 1.1946885585784912, "step": 3160 }, { "epoch": 0.752073732718894, "grad_norm": 0.37890625, "learning_rate": 7.425302626042829e-05, "loss": 1.2392586469650269, "step": 3162 }, { "epoch": 0.7525494276795005, "grad_norm": 0.369140625, "learning_rate": 7.41497538353546e-05, "loss": 1.1681256294250488, "step": 3164 }, { "epoch": 0.753025122640107, "grad_norm": 0.3515625, "learning_rate": 7.404563490882917e-05, "loss": 1.1748747825622559, "step": 3166 }, { "epoch": 0.7535008176007135, "grad_norm": 0.369140625, "learning_rate": 7.394067206174386e-05, "loss": 1.1887366771697998, "step": 3168 }, { "epoch": 0.75397651256132, "grad_norm": 0.36328125, "learning_rate": 7.383486789590961e-05, "loss": 1.1796954870224, "step": 3170 }, { "epoch": 0.7544522075219265, "grad_norm": 0.375, "learning_rate": 7.372822503399188e-05, "loss": 1.1664338111877441, "step": 3172 }, { "epoch": 0.754927902482533, "grad_norm": 0.37890625, "learning_rate": 7.362074611944566e-05, "loss": 1.235155463218689, "step": 3174 }, { "epoch": 0.7554035974431396, "grad_norm": 0.353515625, "learning_rate": 7.351243381644998e-05, "loss": 1.1678838729858398, "step": 3176 }, { "epoch": 0.7558792924037461, "grad_norm": 0.3671875, "learning_rate": 7.340329080984177e-05, "loss": 1.2551286220550537, "step": 3178 }, { "epoch": 0.7563549873643526, "grad_norm": 0.361328125, "learning_rate": 7.329331980504947e-05, "loss": 1.200148105621338, "step": 3180 }, { "epoch": 0.7568306823249591, "grad_norm": 0.353515625, "learning_rate": 7.318252352802579e-05, "loss": 1.255072832107544, "step": 3182 }, { "epoch": 0.7573063772855656, "grad_norm": 0.34375, "learning_rate": 7.307090472518026e-05, "loss": 1.1907069683074951, "step": 3184 }, { "epoch": 0.7577820722461721, "grad_norm": 0.3671875, "learning_rate": 7.295846616331113e-05, "loss": 1.202185034751892, "step": 3186 }, { "epoch": 0.7582577672067786, "grad_norm": 0.341796875, "learning_rate": 7.284521062953675e-05, "loss": 1.169918179512024, "step": 3188 }, { "epoch": 0.7587334621673851, "grad_norm": 0.34765625, "learning_rate": 7.27311409312265e-05, "loss": 1.1812635660171509, "step": 3190 }, { "epoch": 0.7592091571279916, "grad_norm": 0.341796875, "learning_rate": 7.261625989593127e-05, "loss": 1.184064507484436, "step": 3192 }, { "epoch": 0.7596848520885982, "grad_norm": 0.359375, "learning_rate": 7.250057037131322e-05, "loss": 1.1607537269592285, "step": 3194 }, { "epoch": 0.7601605470492047, "grad_norm": 0.359375, "learning_rate": 7.238407522507533e-05, "loss": 1.2583791017532349, "step": 3196 }, { "epoch": 0.7606362420098112, "grad_norm": 0.353515625, "learning_rate": 7.226677734489026e-05, "loss": 1.2102004289627075, "step": 3198 }, { "epoch": 0.7611119369704177, "grad_norm": 0.357421875, "learning_rate": 7.214867963832877e-05, "loss": 1.2008968591690063, "step": 3200 }, { "epoch": 0.7615876319310242, "grad_norm": 0.37890625, "learning_rate": 7.202978503278766e-05, "loss": 1.1674326658248901, "step": 3202 }, { "epoch": 0.7620633268916307, "grad_norm": 0.373046875, "learning_rate": 7.191009647541721e-05, "loss": 1.168144941329956, "step": 3204 }, { "epoch": 0.7625390218522372, "grad_norm": 0.345703125, "learning_rate": 7.178961693304809e-05, "loss": 1.1678907871246338, "step": 3206 }, { "epoch": 0.7630147168128437, "grad_norm": 0.345703125, "learning_rate": 7.166834939211786e-05, "loss": 1.1986507177352905, "step": 3208 }, { "epoch": 0.7634904117734502, "grad_norm": 0.32421875, "learning_rate": 7.154629685859694e-05, "loss": 1.1866064071655273, "step": 3210 }, { "epoch": 0.7639661067340567, "grad_norm": 0.34375, "learning_rate": 7.142346235791406e-05, "loss": 1.1903237104415894, "step": 3212 }, { "epoch": 0.7644418016946632, "grad_norm": 0.365234375, "learning_rate": 7.129984893488132e-05, "loss": 1.177189826965332, "step": 3214 }, { "epoch": 0.7649174966552698, "grad_norm": 0.365234375, "learning_rate": 7.117545965361866e-05, "loss": 1.1988158226013184, "step": 3216 }, { "epoch": 0.7653931916158763, "grad_norm": 0.34375, "learning_rate": 7.105029759747794e-05, "loss": 1.1733431816101074, "step": 3218 }, { "epoch": 0.7658688865764828, "grad_norm": 0.3671875, "learning_rate": 7.092436586896653e-05, "loss": 1.287745714187622, "step": 3220 }, { "epoch": 0.7663445815370893, "grad_norm": 0.359375, "learning_rate": 7.079766758967032e-05, "loss": 1.1643383502960205, "step": 3222 }, { "epoch": 0.7668202764976959, "grad_norm": 0.34375, "learning_rate": 7.067020590017648e-05, "loss": 1.1338480710983276, "step": 3224 }, { "epoch": 0.7672959714583024, "grad_norm": 0.359375, "learning_rate": 7.054198395999546e-05, "loss": 1.1828383207321167, "step": 3226 }, { "epoch": 0.7677716664189089, "grad_norm": 0.333984375, "learning_rate": 7.04130049474828e-05, "loss": 1.215213656425476, "step": 3228 }, { "epoch": 0.7682473613795154, "grad_norm": 0.341796875, "learning_rate": 7.028327205976026e-05, "loss": 1.2250659465789795, "step": 3230 }, { "epoch": 0.768723056340122, "grad_norm": 0.3515625, "learning_rate": 7.01527885126366e-05, "loss": 1.2371430397033691, "step": 3232 }, { "epoch": 0.7691987513007285, "grad_norm": 0.349609375, "learning_rate": 7.002155754052789e-05, "loss": 1.202965497970581, "step": 3234 }, { "epoch": 0.769674446261335, "grad_norm": 0.3515625, "learning_rate": 6.988958239637727e-05, "loss": 1.1786177158355713, "step": 3236 }, { "epoch": 0.7701501412219415, "grad_norm": 0.33203125, "learning_rate": 6.975686635157441e-05, "loss": 1.1610124111175537, "step": 3238 }, { "epoch": 0.770625836182548, "grad_norm": 0.33984375, "learning_rate": 6.962341269587436e-05, "loss": 1.2252613306045532, "step": 3240 }, { "epoch": 0.7711015311431545, "grad_norm": 0.365234375, "learning_rate": 6.948922473731594e-05, "loss": 1.2469508647918701, "step": 3242 }, { "epoch": 0.771577226103761, "grad_norm": 0.35546875, "learning_rate": 6.935430580213993e-05, "loss": 1.1859698295593262, "step": 3244 }, { "epoch": 0.7720529210643675, "grad_norm": 0.373046875, "learning_rate": 6.92186592347064e-05, "loss": 1.21319580078125, "step": 3246 }, { "epoch": 0.772528616024974, "grad_norm": 0.359375, "learning_rate": 6.908228839741198e-05, "loss": 1.145960807800293, "step": 3248 }, { "epoch": 0.7730043109855805, "grad_norm": 0.333984375, "learning_rate": 6.894519667060638e-05, "loss": 1.2456450462341309, "step": 3250 }, { "epoch": 0.773480005946187, "grad_norm": 0.359375, "learning_rate": 6.880738745250872e-05, "loss": 1.186368703842163, "step": 3252 }, { "epoch": 0.7739557009067936, "grad_norm": 0.359375, "learning_rate": 6.866886415912325e-05, "loss": 1.185645580291748, "step": 3254 }, { "epoch": 0.7744313958674001, "grad_norm": 0.34765625, "learning_rate": 6.852963022415458e-05, "loss": 1.2109339237213135, "step": 3256 }, { "epoch": 0.7749070908280066, "grad_norm": 0.34375, "learning_rate": 6.838968909892272e-05, "loss": 1.2080646753311157, "step": 3258 }, { "epoch": 0.7753827857886131, "grad_norm": 0.35546875, "learning_rate": 6.824904425227746e-05, "loss": 1.23634934425354, "step": 3260 }, { "epoch": 0.7758584807492196, "grad_norm": 0.33984375, "learning_rate": 6.810769917051233e-05, "loss": 1.1664297580718994, "step": 3262 }, { "epoch": 0.7763341757098261, "grad_norm": 0.341796875, "learning_rate": 6.796565735727829e-05, "loss": 1.176924467086792, "step": 3264 }, { "epoch": 0.7768098706704326, "grad_norm": 0.341796875, "learning_rate": 6.782292233349676e-05, "loss": 1.2261974811553955, "step": 3266 }, { "epoch": 0.7772855656310391, "grad_norm": 0.33203125, "learning_rate": 6.767949763727251e-05, "loss": 1.2133498191833496, "step": 3268 }, { "epoch": 0.7777612605916456, "grad_norm": 0.349609375, "learning_rate": 6.753538682380573e-05, "loss": 1.2278404235839844, "step": 3270 }, { "epoch": 0.7782369555522521, "grad_norm": 0.34375, "learning_rate": 6.739059346530412e-05, "loss": 1.176490306854248, "step": 3272 }, { "epoch": 0.7787126505128587, "grad_norm": 0.33203125, "learning_rate": 6.724512115089426e-05, "loss": 1.223867654800415, "step": 3274 }, { "epoch": 0.7791883454734652, "grad_norm": 0.337890625, "learning_rate": 6.709897348653258e-05, "loss": 1.1769992113113403, "step": 3276 }, { "epoch": 0.7796640404340717, "grad_norm": 0.359375, "learning_rate": 6.695215409491605e-05, "loss": 1.1771578788757324, "step": 3278 }, { "epoch": 0.7801397353946782, "grad_norm": 0.345703125, "learning_rate": 6.68046666153924e-05, "loss": 1.2103866338729858, "step": 3280 }, { "epoch": 0.7806154303552847, "grad_norm": 0.33203125, "learning_rate": 6.66565147038698e-05, "loss": 1.1617302894592285, "step": 3282 }, { "epoch": 0.7810911253158912, "grad_norm": 0.333984375, "learning_rate": 6.65077020327264e-05, "loss": 1.2142266035079956, "step": 3284 }, { "epoch": 0.7815668202764977, "grad_norm": 0.359375, "learning_rate": 6.635823229071915e-05, "loss": 1.2032921314239502, "step": 3286 }, { "epoch": 0.7820425152371042, "grad_norm": 0.361328125, "learning_rate": 6.620810918289241e-05, "loss": 1.1510361433029175, "step": 3288 }, { "epoch": 0.7825182101977107, "grad_norm": 0.3515625, "learning_rate": 6.605733643048615e-05, "loss": 1.209721326828003, "step": 3290 }, { "epoch": 0.7829939051583172, "grad_norm": 0.359375, "learning_rate": 6.590591777084368e-05, "loss": 1.1635715961456299, "step": 3292 }, { "epoch": 0.7834696001189237, "grad_norm": 0.33203125, "learning_rate": 6.575385695731902e-05, "loss": 1.1776684522628784, "step": 3294 }, { "epoch": 0.7839452950795303, "grad_norm": 0.32421875, "learning_rate": 6.560115775918379e-05, "loss": 1.1247327327728271, "step": 3296 }, { "epoch": 0.7844209900401368, "grad_norm": 0.33984375, "learning_rate": 6.544782396153392e-05, "loss": 1.270646572113037, "step": 3298 }, { "epoch": 0.7848966850007433, "grad_norm": 0.345703125, "learning_rate": 6.529385936519568e-05, "loss": 1.1621270179748535, "step": 3300 }, { "epoch": 0.7853723799613498, "grad_norm": 0.341796875, "learning_rate": 6.513926778663156e-05, "loss": 1.1540793180465698, "step": 3302 }, { "epoch": 0.7858480749219563, "grad_norm": 0.337890625, "learning_rate": 6.498405305784562e-05, "loss": 1.1824688911437988, "step": 3304 }, { "epoch": 0.7863237698825628, "grad_norm": 0.34375, "learning_rate": 6.482821902628857e-05, "loss": 1.182361125946045, "step": 3306 }, { "epoch": 0.7867994648431693, "grad_norm": 0.353515625, "learning_rate": 6.467176955476224e-05, "loss": 1.2419183254241943, "step": 3308 }, { "epoch": 0.7872751598037758, "grad_norm": 0.326171875, "learning_rate": 6.451470852132409e-05, "loss": 1.198357105255127, "step": 3310 }, { "epoch": 0.7877508547643823, "grad_norm": 0.36328125, "learning_rate": 6.435703981919077e-05, "loss": 1.1796178817749023, "step": 3312 }, { "epoch": 0.7882265497249888, "grad_norm": 0.3359375, "learning_rate": 6.419876735664188e-05, "loss": 1.1940312385559082, "step": 3314 }, { "epoch": 0.7887022446855954, "grad_norm": 0.3359375, "learning_rate": 6.403989505692296e-05, "loss": 1.1873643398284912, "step": 3316 }, { "epoch": 0.7891779396462019, "grad_norm": 0.341796875, "learning_rate": 6.388042685814827e-05, "loss": 1.1884150505065918, "step": 3318 }, { "epoch": 0.7896536346068084, "grad_norm": 0.345703125, "learning_rate": 6.372036671320315e-05, "loss": 1.1984798908233643, "step": 3320 }, { "epoch": 0.7901293295674149, "grad_norm": 0.326171875, "learning_rate": 6.355971858964607e-05, "loss": 1.191229224205017, "step": 3322 }, { "epoch": 0.7906050245280214, "grad_norm": 0.34375, "learning_rate": 6.339848646961029e-05, "loss": 1.1361331939697266, "step": 3324 }, { "epoch": 0.7910807194886279, "grad_norm": 0.349609375, "learning_rate": 6.323667434970508e-05, "loss": 1.2309892177581787, "step": 3326 }, { "epoch": 0.7915564144492344, "grad_norm": 0.349609375, "learning_rate": 6.307428624091674e-05, "loss": 1.1435422897338867, "step": 3328 }, { "epoch": 0.7920321094098409, "grad_norm": 0.373046875, "learning_rate": 6.291132616850912e-05, "loss": 1.181205153465271, "step": 3330 }, { "epoch": 0.7925078043704474, "grad_norm": 0.37109375, "learning_rate": 6.274779817192389e-05, "loss": 1.1939911842346191, "step": 3332 }, { "epoch": 0.7929834993310539, "grad_norm": 0.341796875, "learning_rate": 6.258370630468032e-05, "loss": 1.2611286640167236, "step": 3334 }, { "epoch": 0.7934591942916605, "grad_norm": 0.33984375, "learning_rate": 6.241905463427493e-05, "loss": 1.1541907787322998, "step": 3336 }, { "epoch": 0.793934889252267, "grad_norm": 0.33984375, "learning_rate": 6.225384724208056e-05, "loss": 1.2033154964447021, "step": 3338 }, { "epoch": 0.7944105842128735, "grad_norm": 0.349609375, "learning_rate": 6.208808822324524e-05, "loss": 1.191408634185791, "step": 3340 }, { "epoch": 0.79488627917348, "grad_norm": 0.36328125, "learning_rate": 6.192178168659069e-05, "loss": 1.1633325815200806, "step": 3342 }, { "epoch": 0.7953619741340865, "grad_norm": 0.3359375, "learning_rate": 6.175493175451045e-05, "loss": 1.130890965461731, "step": 3344 }, { "epoch": 0.795837669094693, "grad_norm": 0.34765625, "learning_rate": 6.15875425628677e-05, "loss": 1.2087476253509521, "step": 3346 }, { "epoch": 0.7963133640552995, "grad_norm": 0.34765625, "learning_rate": 6.141961826089276e-05, "loss": 1.2083730697631836, "step": 3348 }, { "epoch": 0.796789059015906, "grad_norm": 0.341796875, "learning_rate": 6.125116301108021e-05, "loss": 1.1795260906219482, "step": 3350 }, { "epoch": 0.7972647539765125, "grad_norm": 0.37109375, "learning_rate": 6.108218098908573e-05, "loss": 1.160348892211914, "step": 3352 }, { "epoch": 0.797740448937119, "grad_norm": 0.404296875, "learning_rate": 6.0912676383622595e-05, "loss": 1.2218070030212402, "step": 3354 }, { "epoch": 0.7982161438977255, "grad_norm": 0.380859375, "learning_rate": 6.074265339635782e-05, "loss": 1.2201728820800781, "step": 3356 }, { "epoch": 0.7986918388583321, "grad_norm": 0.345703125, "learning_rate": 6.057211624180803e-05, "loss": 1.2353184223175049, "step": 3358 }, { "epoch": 0.7991675338189386, "grad_norm": 0.330078125, "learning_rate": 6.0401069147235016e-05, "loss": 1.199735403060913, "step": 3360 }, { "epoch": 0.7996432287795451, "grad_norm": 0.33984375, "learning_rate": 6.02295163525409e-05, "loss": 1.1990015506744385, "step": 3362 }, { "epoch": 0.8001189237401516, "grad_norm": 0.36328125, "learning_rate": 6.0057462110163054e-05, "loss": 1.2302005290985107, "step": 3364 }, { "epoch": 0.8005946187007581, "grad_norm": 0.345703125, "learning_rate": 5.9884910684968704e-05, "loss": 1.1892058849334717, "step": 3366 }, { "epoch": 0.8010703136613646, "grad_norm": 0.384765625, "learning_rate": 5.9711866354149205e-05, "loss": 1.1621990203857422, "step": 3368 }, { "epoch": 0.8015460086219711, "grad_norm": 0.3515625, "learning_rate": 5.953833340711404e-05, "loss": 1.191482663154602, "step": 3370 }, { "epoch": 0.8020217035825776, "grad_norm": 0.337890625, "learning_rate": 5.9364316145384424e-05, "loss": 1.2096929550170898, "step": 3372 }, { "epoch": 0.8024973985431841, "grad_norm": 0.34375, "learning_rate": 5.918981888248679e-05, "loss": 1.1668099164962769, "step": 3374 }, { "epoch": 0.8029730935037906, "grad_norm": 0.318359375, "learning_rate": 5.901484594384574e-05, "loss": 1.2103668451309204, "step": 3376 }, { "epoch": 0.8034487884643972, "grad_norm": 0.349609375, "learning_rate": 5.883940166667692e-05, "loss": 1.208052396774292, "step": 3378 }, { "epoch": 0.8039244834250037, "grad_norm": 0.349609375, "learning_rate": 5.866349039987949e-05, "loss": 1.1915090084075928, "step": 3380 }, { "epoch": 0.8044001783856102, "grad_norm": 0.3359375, "learning_rate": 5.8487116503928294e-05, "loss": 1.1815118789672852, "step": 3382 }, { "epoch": 0.8048758733462167, "grad_norm": 0.337890625, "learning_rate": 5.8310284350765796e-05, "loss": 1.1728663444519043, "step": 3384 }, { "epoch": 0.8053515683068232, "grad_norm": 0.330078125, "learning_rate": 5.813299832369371e-05, "loss": 1.1404354572296143, "step": 3386 }, { "epoch": 0.8058272632674297, "grad_norm": 0.3515625, "learning_rate": 5.7955262817264333e-05, "loss": 1.2187399864196777, "step": 3388 }, { "epoch": 0.8063029582280363, "grad_norm": 0.35546875, "learning_rate": 5.777708223717162e-05, "loss": 1.1979572772979736, "step": 3390 }, { "epoch": 0.8067786531886428, "grad_norm": 0.333984375, "learning_rate": 5.7598461000142e-05, "loss": 1.181311011314392, "step": 3392 }, { "epoch": 0.8072543481492493, "grad_norm": 0.333984375, "learning_rate": 5.7419403533824825e-05, "loss": 1.1990816593170166, "step": 3394 }, { "epoch": 0.8077300431098559, "grad_norm": 0.333984375, "learning_rate": 5.7239914276682735e-05, "loss": 1.176539659500122, "step": 3396 }, { "epoch": 0.8082057380704624, "grad_norm": 0.326171875, "learning_rate": 5.7059997677881495e-05, "loss": 1.1944094896316528, "step": 3398 }, { "epoch": 0.8086814330310689, "grad_norm": 0.322265625, "learning_rate": 5.687965819717982e-05, "loss": 1.1964213848114014, "step": 3400 }, { "epoch": 0.8091571279916754, "grad_norm": 0.3359375, "learning_rate": 5.66989003048188e-05, "loss": 1.1884610652923584, "step": 3402 }, { "epoch": 0.8096328229522819, "grad_norm": 0.337890625, "learning_rate": 5.651772848141104e-05, "loss": 1.2424553632736206, "step": 3404 }, { "epoch": 0.8101085179128884, "grad_norm": 0.32421875, "learning_rate": 5.633614721782968e-05, "loss": 1.1997463703155518, "step": 3406 }, { "epoch": 0.8105842128734949, "grad_norm": 0.3203125, "learning_rate": 5.6154161015096985e-05, "loss": 1.1804287433624268, "step": 3408 }, { "epoch": 0.8110599078341014, "grad_norm": 0.333984375, "learning_rate": 5.5971774384272875e-05, "loss": 1.2427394390106201, "step": 3410 }, { "epoch": 0.8115356027947079, "grad_norm": 0.326171875, "learning_rate": 5.5788991846343e-05, "loss": 1.2132554054260254, "step": 3412 }, { "epoch": 0.8120112977553144, "grad_norm": 0.337890625, "learning_rate": 5.5605817932106757e-05, "loss": 1.2068378925323486, "step": 3414 }, { "epoch": 0.812486992715921, "grad_norm": 0.3203125, "learning_rate": 5.542225718206494e-05, "loss": 1.2010424137115479, "step": 3416 }, { "epoch": 0.8129626876765275, "grad_norm": 0.32421875, "learning_rate": 5.523831414630719e-05, "loss": 1.1713800430297852, "step": 3418 }, { "epoch": 0.813438382637134, "grad_norm": 0.330078125, "learning_rate": 5.505399338439922e-05, "loss": 1.1795239448547363, "step": 3420 }, { "epoch": 0.8139140775977405, "grad_norm": 0.337890625, "learning_rate": 5.48692994652698e-05, "loss": 1.2366602420806885, "step": 3422 }, { "epoch": 0.814389772558347, "grad_norm": 0.3515625, "learning_rate": 5.4684236967097475e-05, "loss": 1.178973913192749, "step": 3424 }, { "epoch": 0.8148654675189535, "grad_norm": 0.341796875, "learning_rate": 5.449881047719713e-05, "loss": 1.186044454574585, "step": 3426 }, { "epoch": 0.81534116247956, "grad_norm": 0.3359375, "learning_rate": 5.431302459190621e-05, "loss": 1.2068400382995605, "step": 3428 }, { "epoch": 0.8158168574401665, "grad_norm": 0.32421875, "learning_rate": 5.412688391647084e-05, "loss": 1.155308723449707, "step": 3430 }, { "epoch": 0.816292552400773, "grad_norm": 0.3203125, "learning_rate": 5.394039306493167e-05, "loss": 1.1697208881378174, "step": 3432 }, { "epoch": 0.8167682473613795, "grad_norm": 0.337890625, "learning_rate": 5.3753556660009475e-05, "loss": 1.160557746887207, "step": 3434 }, { "epoch": 0.817243942321986, "grad_norm": 0.34375, "learning_rate": 5.356637933299057e-05, "loss": 1.1798973083496094, "step": 3436 }, { "epoch": 0.8177196372825926, "grad_norm": 0.330078125, "learning_rate": 5.337886572361205e-05, "loss": 1.1533775329589844, "step": 3438 }, { "epoch": 0.8181953322431991, "grad_norm": 0.322265625, "learning_rate": 5.319102047994672e-05, "loss": 1.1831254959106445, "step": 3440 }, { "epoch": 0.8186710272038056, "grad_norm": 0.322265625, "learning_rate": 5.300284825828793e-05, "loss": 1.1955242156982422, "step": 3442 }, { "epoch": 0.8191467221644121, "grad_norm": 0.326171875, "learning_rate": 5.2814353723034126e-05, "loss": 1.188542127609253, "step": 3444 }, { "epoch": 0.8196224171250186, "grad_norm": 0.326171875, "learning_rate": 5.262554154657324e-05, "loss": 1.2146074771881104, "step": 3446 }, { "epoch": 0.8200981120856251, "grad_norm": 0.3046875, "learning_rate": 5.2436416409166884e-05, "loss": 1.1553959846496582, "step": 3448 }, { "epoch": 0.8205738070462316, "grad_norm": 0.3359375, "learning_rate": 5.2246982998834276e-05, "loss": 1.1827256679534912, "step": 3450 }, { "epoch": 0.8210495020068381, "grad_norm": 0.3125, "learning_rate": 5.205724601123614e-05, "loss": 1.1618741750717163, "step": 3452 }, { "epoch": 0.8215251969674446, "grad_norm": 0.337890625, "learning_rate": 5.186721014955822e-05, "loss": 1.2132587432861328, "step": 3454 }, { "epoch": 0.8220008919280511, "grad_norm": 0.33203125, "learning_rate": 5.167688012439472e-05, "loss": 1.1444640159606934, "step": 3456 }, { "epoch": 0.8224765868886577, "grad_norm": 0.328125, "learning_rate": 5.1486260653631554e-05, "loss": 1.1991591453552246, "step": 3458 }, { "epoch": 0.8229522818492642, "grad_norm": 0.33203125, "learning_rate": 5.129535646232941e-05, "loss": 1.1526660919189453, "step": 3460 }, { "epoch": 0.8234279768098707, "grad_norm": 0.3359375, "learning_rate": 5.110417228260657e-05, "loss": 1.1603717803955078, "step": 3462 }, { "epoch": 0.8239036717704772, "grad_norm": 0.333984375, "learning_rate": 5.091271285352167e-05, "loss": 1.154017448425293, "step": 3464 }, { "epoch": 0.8243793667310837, "grad_norm": 0.3359375, "learning_rate": 5.07209829209562e-05, "loss": 1.1853911876678467, "step": 3466 }, { "epoch": 0.8248550616916902, "grad_norm": 0.357421875, "learning_rate": 5.0528987237496866e-05, "loss": 1.2097725868225098, "step": 3468 }, { "epoch": 0.8253307566522967, "grad_norm": 0.3515625, "learning_rate": 5.033673056231781e-05, "loss": 1.200005054473877, "step": 3470 }, { "epoch": 0.8258064516129032, "grad_norm": 0.326171875, "learning_rate": 5.0144217661062574e-05, "loss": 1.2073945999145508, "step": 3472 }, { "epoch": 0.8262821465735097, "grad_norm": 0.32421875, "learning_rate": 4.9951453305726055e-05, "loss": 1.1431573629379272, "step": 3474 }, { "epoch": 0.8267578415341162, "grad_norm": 0.33984375, "learning_rate": 4.975844227453615e-05, "loss": 1.2093596458435059, "step": 3476 }, { "epoch": 0.8272335364947228, "grad_norm": 0.328125, "learning_rate": 4.9565189351835336e-05, "loss": 1.1971302032470703, "step": 3478 }, { "epoch": 0.8277092314553293, "grad_norm": 0.326171875, "learning_rate": 4.93716993279621e-05, "loss": 1.1951606273651123, "step": 3480 }, { "epoch": 0.8281849264159358, "grad_norm": 0.3359375, "learning_rate": 4.917797699913215e-05, "loss": 1.1961910724639893, "step": 3482 }, { "epoch": 0.8286606213765423, "grad_norm": 0.33984375, "learning_rate": 4.8984027167319566e-05, "loss": 1.1884233951568604, "step": 3484 }, { "epoch": 0.8291363163371488, "grad_norm": 0.3203125, "learning_rate": 4.8789854640137736e-05, "loss": 1.1898481845855713, "step": 3486 }, { "epoch": 0.8296120112977553, "grad_norm": 0.314453125, "learning_rate": 4.859546423072023e-05, "loss": 1.1624311208724976, "step": 3488 }, { "epoch": 0.8300877062583618, "grad_norm": 0.3359375, "learning_rate": 4.840086075760146e-05, "loss": 1.1634624004364014, "step": 3490 }, { "epoch": 0.8305634012189683, "grad_norm": 0.306640625, "learning_rate": 4.820604904459722e-05, "loss": 1.1898113489151, "step": 3492 }, { "epoch": 0.8310390961795748, "grad_norm": 0.318359375, "learning_rate": 4.801103392068516e-05, "loss": 1.2224345207214355, "step": 3494 }, { "epoch": 0.8315147911401813, "grad_norm": 0.30859375, "learning_rate": 4.781582021988507e-05, "loss": 1.1299514770507812, "step": 3496 }, { "epoch": 0.8319904861007879, "grad_norm": 0.322265625, "learning_rate": 4.762041278113902e-05, "loss": 1.2070683240890503, "step": 3498 }, { "epoch": 0.8324661810613944, "grad_norm": 0.32421875, "learning_rate": 4.742481644819148e-05, "loss": 1.1668651103973389, "step": 3500 }, { "epoch": 0.8329418760220009, "grad_norm": 0.314453125, "learning_rate": 4.7229036069469193e-05, "loss": 1.1788852214813232, "step": 3502 }, { "epoch": 0.8334175709826074, "grad_norm": 0.310546875, "learning_rate": 4.703307649796099e-05, "loss": 1.128293752670288, "step": 3504 }, { "epoch": 0.8338932659432139, "grad_norm": 0.33203125, "learning_rate": 4.683694259109757e-05, "loss": 1.1507880687713623, "step": 3506 }, { "epoch": 0.8343689609038204, "grad_norm": 0.314453125, "learning_rate": 4.664063921063101e-05, "loss": 1.1574411392211914, "step": 3508 }, { "epoch": 0.8348446558644269, "grad_norm": 0.330078125, "learning_rate": 4.644417122251428e-05, "loss": 1.1994435787200928, "step": 3510 }, { "epoch": 0.8353203508250334, "grad_norm": 0.322265625, "learning_rate": 4.6247543496780675e-05, "loss": 1.1481845378875732, "step": 3512 }, { "epoch": 0.8357960457856399, "grad_norm": 0.31640625, "learning_rate": 4.605076090742299e-05, "loss": 1.184557557106018, "step": 3514 }, { "epoch": 0.8362717407462464, "grad_norm": 0.310546875, "learning_rate": 4.585382833227281e-05, "loss": 1.1902873516082764, "step": 3516 }, { "epoch": 0.836747435706853, "grad_norm": 0.32421875, "learning_rate": 4.565675065287956e-05, "loss": 1.2748725414276123, "step": 3518 }, { "epoch": 0.8372231306674595, "grad_norm": 0.322265625, "learning_rate": 4.545953275438947e-05, "loss": 1.1273387670516968, "step": 3520 }, { "epoch": 0.837698825628066, "grad_norm": 0.31640625, "learning_rate": 4.526217952542456e-05, "loss": 1.1241960525512695, "step": 3522 }, { "epoch": 0.8381745205886725, "grad_norm": 0.326171875, "learning_rate": 4.506469585796133e-05, "loss": 1.1555461883544922, "step": 3524 }, { "epoch": 0.838650215549279, "grad_norm": 0.3359375, "learning_rate": 4.486708664720965e-05, "loss": 1.2142927646636963, "step": 3526 }, { "epoch": 0.8391259105098855, "grad_norm": 0.326171875, "learning_rate": 4.466935679149131e-05, "loss": 1.1009758710861206, "step": 3528 }, { "epoch": 0.839601605470492, "grad_norm": 0.3125, "learning_rate": 4.4471511192118666e-05, "loss": 1.1688785552978516, "step": 3530 }, { "epoch": 0.8400773004310985, "grad_norm": 0.33203125, "learning_rate": 4.427355475327309e-05, "loss": 1.1534974575042725, "step": 3532 }, { "epoch": 0.840552995391705, "grad_norm": 0.30859375, "learning_rate": 4.407549238188346e-05, "loss": 1.150222659111023, "step": 3534 }, { "epoch": 0.8410286903523115, "grad_norm": 0.314453125, "learning_rate": 4.387732898750448e-05, "loss": 1.207751750946045, "step": 3536 }, { "epoch": 0.841504385312918, "grad_norm": 0.318359375, "learning_rate": 4.367906948219502e-05, "loss": 1.1927155256271362, "step": 3538 }, { "epoch": 0.8419800802735246, "grad_norm": 0.33984375, "learning_rate": 4.348071878039633e-05, "loss": 1.1655819416046143, "step": 3540 }, { "epoch": 0.8424557752341311, "grad_norm": 0.345703125, "learning_rate": 4.3282281798810256e-05, "loss": 1.1812100410461426, "step": 3542 }, { "epoch": 0.8429314701947376, "grad_norm": 0.3359375, "learning_rate": 4.308376345627728e-05, "loss": 1.2032802104949951, "step": 3544 }, { "epoch": 0.8434071651553441, "grad_norm": 0.322265625, "learning_rate": 4.288516867365474e-05, "loss": 1.1608192920684814, "step": 3546 }, { "epoch": 0.8438828601159506, "grad_norm": 0.322265625, "learning_rate": 4.2686502373694684e-05, "loss": 1.2154037952423096, "step": 3548 }, { "epoch": 0.8443585550765571, "grad_norm": 0.333984375, "learning_rate": 4.248776948092197e-05, "loss": 1.152782917022705, "step": 3550 }, { "epoch": 0.8448342500371636, "grad_norm": 0.318359375, "learning_rate": 4.228897492151213e-05, "loss": 1.176882028579712, "step": 3552 }, { "epoch": 0.8453099449977702, "grad_norm": 0.31640625, "learning_rate": 4.209012362316934e-05, "loss": 1.1599602699279785, "step": 3554 }, { "epoch": 0.8457856399583767, "grad_norm": 0.318359375, "learning_rate": 4.1891220515004114e-05, "loss": 1.2112061977386475, "step": 3556 }, { "epoch": 0.8462613349189833, "grad_norm": 0.322265625, "learning_rate": 4.169227052741134e-05, "loss": 1.1296908855438232, "step": 3558 }, { "epoch": 0.8467370298795898, "grad_norm": 0.318359375, "learning_rate": 4.1493278591947855e-05, "loss": 1.1762603521347046, "step": 3560 }, { "epoch": 0.8472127248401963, "grad_norm": 0.3125, "learning_rate": 4.1294249641210354e-05, "loss": 1.2208728790283203, "step": 3562 }, { "epoch": 0.8476884198008028, "grad_norm": 0.326171875, "learning_rate": 4.109518860871305e-05, "loss": 1.2221901416778564, "step": 3564 }, { "epoch": 0.8481641147614093, "grad_norm": 0.32421875, "learning_rate": 4.089610042876537e-05, "loss": 1.1988012790679932, "step": 3566 }, { "epoch": 0.8486398097220158, "grad_norm": 0.310546875, "learning_rate": 4.069699003634972e-05, "loss": 1.1596108675003052, "step": 3568 }, { "epoch": 0.8491155046826223, "grad_norm": 0.3125, "learning_rate": 4.0497862366999034e-05, "loss": 1.1585445404052734, "step": 3570 }, { "epoch": 0.8495911996432288, "grad_norm": 0.3125, "learning_rate": 4.0298722356674584e-05, "loss": 1.1766672134399414, "step": 3572 }, { "epoch": 0.8500668946038353, "grad_norm": 0.32421875, "learning_rate": 4.0099574941643506e-05, "loss": 1.1228039264678955, "step": 3574 }, { "epoch": 0.8505425895644418, "grad_norm": 0.31640625, "learning_rate": 3.990042505835651e-05, "loss": 1.1494994163513184, "step": 3576 }, { "epoch": 0.8510182845250484, "grad_norm": 0.328125, "learning_rate": 3.9701277643325416e-05, "loss": 1.202513575553894, "step": 3578 }, { "epoch": 0.8514939794856549, "grad_norm": 0.314453125, "learning_rate": 3.950213763300097e-05, "loss": 1.179110050201416, "step": 3580 }, { "epoch": 0.8519696744462614, "grad_norm": 0.328125, "learning_rate": 3.9303009963650306e-05, "loss": 1.1852927207946777, "step": 3582 }, { "epoch": 0.8524453694068679, "grad_norm": 0.326171875, "learning_rate": 3.910389957123464e-05, "loss": 1.1301989555358887, "step": 3584 }, { "epoch": 0.8529210643674744, "grad_norm": 0.32421875, "learning_rate": 3.890481139128696e-05, "loss": 1.2232120037078857, "step": 3586 }, { "epoch": 0.8533967593280809, "grad_norm": 0.310546875, "learning_rate": 3.8705750358789646e-05, "loss": 1.1268978118896484, "step": 3588 }, { "epoch": 0.8538724542886874, "grad_norm": 0.32421875, "learning_rate": 3.850672140805216e-05, "loss": 1.2016334533691406, "step": 3590 }, { "epoch": 0.8543481492492939, "grad_norm": 0.310546875, "learning_rate": 3.830772947258869e-05, "loss": 1.2152290344238281, "step": 3592 }, { "epoch": 0.8548238442099004, "grad_norm": 0.32421875, "learning_rate": 3.810877948499589e-05, "loss": 1.209730625152588, "step": 3594 }, { "epoch": 0.8552995391705069, "grad_norm": 0.314453125, "learning_rate": 3.790987637683069e-05, "loss": 1.1957197189331055, "step": 3596 }, { "epoch": 0.8557752341311134, "grad_norm": 0.306640625, "learning_rate": 3.7711025078487876e-05, "loss": 1.1268858909606934, "step": 3598 }, { "epoch": 0.85625092909172, "grad_norm": 0.310546875, "learning_rate": 3.751223051907805e-05, "loss": 1.2362475395202637, "step": 3600 }, { "epoch": 0.8567266240523265, "grad_norm": 0.30859375, "learning_rate": 3.731349762630534e-05, "loss": 1.172964096069336, "step": 3602 }, { "epoch": 0.857202319012933, "grad_norm": 0.310546875, "learning_rate": 3.711483132634527e-05, "loss": 1.2133592367172241, "step": 3604 }, { "epoch": 0.8576780139735395, "grad_norm": 0.30078125, "learning_rate": 3.691623654372272e-05, "loss": 1.1895489692687988, "step": 3606 }, { "epoch": 0.858153708934146, "grad_norm": 0.31640625, "learning_rate": 3.671771820118975e-05, "loss": 1.1736524105072021, "step": 3608 }, { "epoch": 0.8586294038947525, "grad_norm": 0.306640625, "learning_rate": 3.6519281219603675e-05, "loss": 1.1844290494918823, "step": 3610 }, { "epoch": 0.859105098855359, "grad_norm": 0.306640625, "learning_rate": 3.632093051780498e-05, "loss": 1.1735870838165283, "step": 3612 }, { "epoch": 0.8595807938159655, "grad_norm": 0.30078125, "learning_rate": 3.6122671012495524e-05, "loss": 1.1634467840194702, "step": 3614 }, { "epoch": 0.860056488776572, "grad_norm": 0.310546875, "learning_rate": 3.592450761811656e-05, "loss": 1.178370714187622, "step": 3616 }, { "epoch": 0.8605321837371785, "grad_norm": 0.310546875, "learning_rate": 3.5726445246726915e-05, "loss": 1.153395414352417, "step": 3618 }, { "epoch": 0.8610078786977851, "grad_norm": 0.298828125, "learning_rate": 3.5528488807881354e-05, "loss": 1.1781080961227417, "step": 3620 }, { "epoch": 0.8614835736583916, "grad_norm": 0.330078125, "learning_rate": 3.53306432085087e-05, "loss": 1.1583459377288818, "step": 3622 }, { "epoch": 0.8619592686189981, "grad_norm": 0.314453125, "learning_rate": 3.513291335279036e-05, "loss": 1.2509000301361084, "step": 3624 }, { "epoch": 0.8624349635796046, "grad_norm": 0.314453125, "learning_rate": 3.4935304142038686e-05, "loss": 1.1476457118988037, "step": 3626 }, { "epoch": 0.8629106585402111, "grad_norm": 0.3046875, "learning_rate": 3.4737820474575456e-05, "loss": 1.1432411670684814, "step": 3628 }, { "epoch": 0.8633863535008176, "grad_norm": 0.314453125, "learning_rate": 3.4540467245610534e-05, "loss": 1.1552605628967285, "step": 3630 }, { "epoch": 0.8638620484614241, "grad_norm": 0.3203125, "learning_rate": 3.4343249347120445e-05, "loss": 1.2122418880462646, "step": 3632 }, { "epoch": 0.8643377434220306, "grad_norm": 0.302734375, "learning_rate": 3.41461716677272e-05, "loss": 1.1323740482330322, "step": 3634 }, { "epoch": 0.8648134383826371, "grad_norm": 0.328125, "learning_rate": 3.394923909257704e-05, "loss": 1.2014985084533691, "step": 3636 }, { "epoch": 0.8652891333432436, "grad_norm": 0.3125, "learning_rate": 3.375245650321934e-05, "loss": 1.188545823097229, "step": 3638 }, { "epoch": 0.8657648283038502, "grad_norm": 0.318359375, "learning_rate": 3.3555828777485726e-05, "loss": 1.178330898284912, "step": 3640 }, { "epoch": 0.8662405232644567, "grad_norm": 0.326171875, "learning_rate": 3.335936078936899e-05, "loss": 1.1636848449707031, "step": 3642 }, { "epoch": 0.8667162182250632, "grad_norm": 0.310546875, "learning_rate": 3.3163057408902435e-05, "loss": 1.1589958667755127, "step": 3644 }, { "epoch": 0.8671919131856697, "grad_norm": 0.310546875, "learning_rate": 3.296692350203902e-05, "loss": 1.1450896263122559, "step": 3646 }, { "epoch": 0.8676676081462762, "grad_norm": 0.310546875, "learning_rate": 3.277096393053082e-05, "loss": 1.123741626739502, "step": 3648 }, { "epoch": 0.8681433031068827, "grad_norm": 0.3203125, "learning_rate": 3.257518355180853e-05, "loss": 1.187320351600647, "step": 3650 }, { "epoch": 0.8686189980674892, "grad_norm": 0.3125, "learning_rate": 3.2379587218860976e-05, "loss": 1.16719651222229, "step": 3652 }, { "epoch": 0.8690946930280957, "grad_norm": 0.318359375, "learning_rate": 3.2184179780114944e-05, "loss": 1.196395993232727, "step": 3654 }, { "epoch": 0.8695703879887022, "grad_norm": 0.31640625, "learning_rate": 3.198896607931485e-05, "loss": 1.2043986320495605, "step": 3656 }, { "epoch": 0.8700460829493087, "grad_norm": 0.306640625, "learning_rate": 3.179395095540279e-05, "loss": 1.1552737951278687, "step": 3658 }, { "epoch": 0.8705217779099153, "grad_norm": 0.3046875, "learning_rate": 3.1599139242398556e-05, "loss": 1.180349588394165, "step": 3660 }, { "epoch": 0.8709974728705218, "grad_norm": 0.30078125, "learning_rate": 3.1404535769279764e-05, "loss": 1.1361455917358398, "step": 3662 }, { "epoch": 0.8714731678311283, "grad_norm": 0.3046875, "learning_rate": 3.121014535986227e-05, "loss": 1.1576318740844727, "step": 3664 }, { "epoch": 0.8719488627917348, "grad_norm": 0.29296875, "learning_rate": 3.1015972832680454e-05, "loss": 1.083686113357544, "step": 3666 }, { "epoch": 0.8724245577523413, "grad_norm": 0.30859375, "learning_rate": 3.0822023000867863e-05, "loss": 1.1516526937484741, "step": 3668 }, { "epoch": 0.8729002527129478, "grad_norm": 0.296875, "learning_rate": 3.062830067203792e-05, "loss": 1.1149940490722656, "step": 3670 }, { "epoch": 0.8733759476735543, "grad_norm": 0.3125, "learning_rate": 3.043481064816467e-05, "loss": 1.1872518062591553, "step": 3672 }, { "epoch": 0.8738516426341608, "grad_norm": 0.32421875, "learning_rate": 3.0241557725463866e-05, "loss": 1.133741021156311, "step": 3674 }, { "epoch": 0.8743273375947673, "grad_norm": 0.306640625, "learning_rate": 3.0048546694273965e-05, "loss": 1.1402521133422852, "step": 3676 }, { "epoch": 0.8748030325553738, "grad_norm": 0.3125, "learning_rate": 2.9855782338937432e-05, "loss": 1.2263612747192383, "step": 3678 }, { "epoch": 0.8752787275159803, "grad_norm": 0.3125, "learning_rate": 2.9663269437682208e-05, "loss": 1.1547777652740479, "step": 3680 }, { "epoch": 0.8757544224765869, "grad_norm": 0.40625, "learning_rate": 2.9471012762503134e-05, "loss": 1.1414549350738525, "step": 3682 }, { "epoch": 0.8762301174371934, "grad_norm": 0.30859375, "learning_rate": 2.9279017079043816e-05, "loss": 1.206810474395752, "step": 3684 }, { "epoch": 0.8767058123977999, "grad_norm": 0.3046875, "learning_rate": 2.908728714647834e-05, "loss": 1.1493148803710938, "step": 3686 }, { "epoch": 0.8771815073584064, "grad_norm": 0.310546875, "learning_rate": 2.8895827717393446e-05, "loss": 1.1840794086456299, "step": 3688 }, { "epoch": 0.8776572023190129, "grad_norm": 0.310546875, "learning_rate": 2.8704643537670603e-05, "loss": 1.1903091669082642, "step": 3690 }, { "epoch": 0.8781328972796194, "grad_norm": 0.30078125, "learning_rate": 2.8513739346368443e-05, "loss": 1.1483159065246582, "step": 3692 }, { "epoch": 0.8786085922402259, "grad_norm": 0.296875, "learning_rate": 2.8323119875605288e-05, "loss": 1.1400749683380127, "step": 3694 }, { "epoch": 0.8790842872008324, "grad_norm": 0.302734375, "learning_rate": 2.813278985044178e-05, "loss": 1.1304882764816284, "step": 3696 }, { "epoch": 0.8795599821614389, "grad_norm": 0.30078125, "learning_rate": 2.794275398876386e-05, "loss": 1.1478686332702637, "step": 3698 }, { "epoch": 0.8800356771220454, "grad_norm": 0.302734375, "learning_rate": 2.7753017001165737e-05, "loss": 1.1680241823196411, "step": 3700 }, { "epoch": 0.880511372082652, "grad_norm": 0.306640625, "learning_rate": 2.7563583590833133e-05, "loss": 1.1892788410186768, "step": 3702 }, { "epoch": 0.8809870670432585, "grad_norm": 0.306640625, "learning_rate": 2.737445845342677e-05, "loss": 1.1995958089828491, "step": 3704 }, { "epoch": 0.881462762003865, "grad_norm": 0.30078125, "learning_rate": 2.718564627696588e-05, "loss": 1.1075689792633057, "step": 3706 }, { "epoch": 0.8819384569644715, "grad_norm": 0.296875, "learning_rate": 2.6997151741712087e-05, "loss": 1.1438966989517212, "step": 3708 }, { "epoch": 0.882414151925078, "grad_norm": 0.310546875, "learning_rate": 2.680897952005329e-05, "loss": 1.209947109222412, "step": 3710 }, { "epoch": 0.8828898468856845, "grad_norm": 0.29296875, "learning_rate": 2.662113427638796e-05, "loss": 1.116198182106018, "step": 3712 }, { "epoch": 0.883365541846291, "grad_norm": 0.30859375, "learning_rate": 2.6433620667009442e-05, "loss": 1.1661490201950073, "step": 3714 }, { "epoch": 0.8838412368068975, "grad_norm": 0.310546875, "learning_rate": 2.6246443339990532e-05, "loss": 1.1473069190979004, "step": 3716 }, { "epoch": 0.884316931767504, "grad_norm": 0.302734375, "learning_rate": 2.605960693506834e-05, "loss": 1.1723562479019165, "step": 3718 }, { "epoch": 0.8847926267281107, "grad_norm": 0.296875, "learning_rate": 2.5873116083529173e-05, "loss": 1.1769287586212158, "step": 3720 }, { "epoch": 0.8852683216887172, "grad_norm": 0.30859375, "learning_rate": 2.56869754080938e-05, "loss": 1.1576387882232666, "step": 3722 }, { "epoch": 0.8857440166493237, "grad_norm": 0.302734375, "learning_rate": 2.550118952280288e-05, "loss": 1.0645157098770142, "step": 3724 }, { "epoch": 0.8862197116099302, "grad_norm": 0.298828125, "learning_rate": 2.531576303290253e-05, "loss": 1.1478241682052612, "step": 3726 }, { "epoch": 0.8866954065705367, "grad_norm": 0.310546875, "learning_rate": 2.5130700534730215e-05, "loss": 1.1812896728515625, "step": 3728 }, { "epoch": 0.8871711015311432, "grad_norm": 0.314453125, "learning_rate": 2.494600661560079e-05, "loss": 1.223722219467163, "step": 3730 }, { "epoch": 0.8876467964917497, "grad_norm": 0.294921875, "learning_rate": 2.4761685853692825e-05, "loss": 1.1464184522628784, "step": 3732 }, { "epoch": 0.8881224914523562, "grad_norm": 0.302734375, "learning_rate": 2.4577742817935077e-05, "loss": 1.167757511138916, "step": 3734 }, { "epoch": 0.8885981864129627, "grad_norm": 0.30078125, "learning_rate": 2.4394182067893243e-05, "loss": 1.1267993450164795, "step": 3736 }, { "epoch": 0.8890738813735692, "grad_norm": 0.302734375, "learning_rate": 2.421100815365701e-05, "loss": 1.1455817222595215, "step": 3738 }, { "epoch": 0.8895495763341758, "grad_norm": 0.30078125, "learning_rate": 2.4028225615727145e-05, "loss": 1.1717948913574219, "step": 3740 }, { "epoch": 0.8900252712947823, "grad_norm": 0.298828125, "learning_rate": 2.384583898490302e-05, "loss": 1.1518162488937378, "step": 3742 }, { "epoch": 0.8905009662553888, "grad_norm": 0.294921875, "learning_rate": 2.3663852782170336e-05, "loss": 1.147728443145752, "step": 3744 }, { "epoch": 0.8909766612159953, "grad_norm": 0.3125, "learning_rate": 2.3482271518588967e-05, "loss": 1.1500670909881592, "step": 3746 }, { "epoch": 0.8914523561766018, "grad_norm": 0.3046875, "learning_rate": 2.330109969518122e-05, "loss": 1.1796722412109375, "step": 3748 }, { "epoch": 0.8919280511372083, "grad_norm": 0.296875, "learning_rate": 2.3120341802820197e-05, "loss": 1.1131136417388916, "step": 3750 }, { "epoch": 0.8924037460978148, "grad_norm": 0.296875, "learning_rate": 2.2940002322118518e-05, "loss": 1.1635349988937378, "step": 3752 }, { "epoch": 0.8928794410584213, "grad_norm": 0.294921875, "learning_rate": 2.2760085723317285e-05, "loss": 1.1256214380264282, "step": 3754 }, { "epoch": 0.8933551360190278, "grad_norm": 0.302734375, "learning_rate": 2.258059646617517e-05, "loss": 1.1560603380203247, "step": 3756 }, { "epoch": 0.8938308309796343, "grad_norm": 0.294921875, "learning_rate": 2.240153899985802e-05, "loss": 1.186435580253601, "step": 3758 }, { "epoch": 0.8943065259402408, "grad_norm": 0.30078125, "learning_rate": 2.222291776282838e-05, "loss": 1.2056632041931152, "step": 3760 }, { "epoch": 0.8947822209008474, "grad_norm": 0.306640625, "learning_rate": 2.204473718273568e-05, "loss": 1.1999526023864746, "step": 3762 }, { "epoch": 0.8952579158614539, "grad_norm": 0.306640625, "learning_rate": 2.1867001676306306e-05, "loss": 1.209770917892456, "step": 3764 }, { "epoch": 0.8957336108220604, "grad_norm": 0.296875, "learning_rate": 2.1689715649234208e-05, "loss": 1.110062599182129, "step": 3766 }, { "epoch": 0.8962093057826669, "grad_norm": 0.302734375, "learning_rate": 2.1512883496071715e-05, "loss": 1.195483922958374, "step": 3768 }, { "epoch": 0.8966850007432734, "grad_norm": 0.310546875, "learning_rate": 2.1336509600120508e-05, "loss": 1.226474642753601, "step": 3770 }, { "epoch": 0.8971606957038799, "grad_norm": 0.294921875, "learning_rate": 2.1160598333323087e-05, "loss": 1.1339728832244873, "step": 3772 }, { "epoch": 0.8976363906644864, "grad_norm": 0.294921875, "learning_rate": 2.0985154056154274e-05, "loss": 1.2141457796096802, "step": 3774 }, { "epoch": 0.8981120856250929, "grad_norm": 0.3046875, "learning_rate": 2.0810181117513215e-05, "loss": 1.1662113666534424, "step": 3776 }, { "epoch": 0.8985877805856994, "grad_norm": 0.298828125, "learning_rate": 2.0635683854615576e-05, "loss": 1.0973902940750122, "step": 3778 }, { "epoch": 0.899063475546306, "grad_norm": 0.2890625, "learning_rate": 2.0461666592885974e-05, "loss": 1.1171178817749023, "step": 3780 }, { "epoch": 0.8995391705069125, "grad_norm": 0.296875, "learning_rate": 2.0288133645850808e-05, "loss": 1.1062219142913818, "step": 3782 }, { "epoch": 0.900014865467519, "grad_norm": 0.322265625, "learning_rate": 2.0115089315031323e-05, "loss": 1.1549062728881836, "step": 3784 }, { "epoch": 0.9004905604281255, "grad_norm": 0.294921875, "learning_rate": 1.9942537889836963e-05, "loss": 1.1845629215240479, "step": 3786 }, { "epoch": 0.900966255388732, "grad_norm": 0.291015625, "learning_rate": 1.9770483647459117e-05, "loss": 1.1162179708480835, "step": 3788 }, { "epoch": 0.9014419503493385, "grad_norm": 0.302734375, "learning_rate": 1.9598930852764987e-05, "loss": 1.1066762208938599, "step": 3790 }, { "epoch": 0.901917645309945, "grad_norm": 0.28515625, "learning_rate": 1.942788375819198e-05, "loss": 1.1973916292190552, "step": 3792 }, { "epoch": 0.9023933402705515, "grad_norm": 0.302734375, "learning_rate": 1.9257346603642203e-05, "loss": 1.1700313091278076, "step": 3794 }, { "epoch": 0.902869035231158, "grad_norm": 0.298828125, "learning_rate": 1.9087323616377414e-05, "loss": 1.1440091133117676, "step": 3796 }, { "epoch": 0.9033447301917645, "grad_norm": 0.296875, "learning_rate": 1.8917819010914283e-05, "loss": 1.1456643342971802, "step": 3798 }, { "epoch": 0.903820425152371, "grad_norm": 0.30078125, "learning_rate": 1.8748836988919793e-05, "loss": 1.1695044040679932, "step": 3800 }, { "epoch": 0.9042961201129776, "grad_norm": 0.291015625, "learning_rate": 1.8580381739107252e-05, "loss": 1.1730451583862305, "step": 3802 }, { "epoch": 0.9047718150735841, "grad_norm": 0.296875, "learning_rate": 1.8412457437132318e-05, "loss": 1.1789326667785645, "step": 3804 }, { "epoch": 0.9052475100341906, "grad_norm": 0.291015625, "learning_rate": 1.824506824548956e-05, "loss": 1.1460459232330322, "step": 3806 }, { "epoch": 0.9057232049947971, "grad_norm": 0.30859375, "learning_rate": 1.8078218313409324e-05, "loss": 1.1638338565826416, "step": 3808 }, { "epoch": 0.9061988999554036, "grad_norm": 0.291015625, "learning_rate": 1.7911911776754756e-05, "loss": 1.1171094179153442, "step": 3810 }, { "epoch": 0.9066745949160101, "grad_norm": 0.306640625, "learning_rate": 1.7746152757919445e-05, "loss": 1.2183301448822021, "step": 3812 }, { "epoch": 0.9071502898766166, "grad_norm": 0.294921875, "learning_rate": 1.758094536572508e-05, "loss": 1.141022801399231, "step": 3814 }, { "epoch": 0.9076259848372231, "grad_norm": 0.296875, "learning_rate": 1.741629369531968e-05, "loss": 1.1439030170440674, "step": 3816 }, { "epoch": 0.9081016797978296, "grad_norm": 0.2890625, "learning_rate": 1.7252201828076126e-05, "loss": 1.1290979385375977, "step": 3818 }, { "epoch": 0.9085773747584361, "grad_norm": 0.294921875, "learning_rate": 1.7088673831490893e-05, "loss": 1.1221880912780762, "step": 3820 }, { "epoch": 0.9090530697190427, "grad_norm": 0.298828125, "learning_rate": 1.6925713759083282e-05, "loss": 1.1449179649353027, "step": 3822 }, { "epoch": 0.9095287646796492, "grad_norm": 0.294921875, "learning_rate": 1.6763325650294933e-05, "loss": 1.148937702178955, "step": 3824 }, { "epoch": 0.9100044596402557, "grad_norm": 0.29296875, "learning_rate": 1.6601513530389727e-05, "loss": 1.12366783618927, "step": 3826 }, { "epoch": 0.9104801546008622, "grad_norm": 0.298828125, "learning_rate": 1.644028141035394e-05, "loss": 1.12631356716156, "step": 3828 }, { "epoch": 0.9109558495614687, "grad_norm": 0.287109375, "learning_rate": 1.627963328679686e-05, "loss": 1.1116429567337036, "step": 3830 }, { "epoch": 0.9114315445220752, "grad_norm": 0.298828125, "learning_rate": 1.6119573141851747e-05, "loss": 1.1646809577941895, "step": 3832 }, { "epoch": 0.9119072394826817, "grad_norm": 0.29296875, "learning_rate": 1.5960104943077045e-05, "loss": 1.0913721323013306, "step": 3834 }, { "epoch": 0.9123829344432882, "grad_norm": 0.296875, "learning_rate": 1.5801232643358134e-05, "loss": 1.1654855012893677, "step": 3836 }, { "epoch": 0.9128586294038947, "grad_norm": 0.294921875, "learning_rate": 1.5642960180809255e-05, "loss": 1.1685070991516113, "step": 3838 }, { "epoch": 0.9133343243645012, "grad_norm": 0.302734375, "learning_rate": 1.5485291478675928e-05, "loss": 1.1893408298492432, "step": 3840 }, { "epoch": 0.9138100193251077, "grad_norm": 0.298828125, "learning_rate": 1.5328230445237758e-05, "loss": 1.1577904224395752, "step": 3842 }, { "epoch": 0.9142857142857143, "grad_norm": 0.2890625, "learning_rate": 1.517178097371144e-05, "loss": 1.1701260805130005, "step": 3844 }, { "epoch": 0.9147614092463208, "grad_norm": 0.296875, "learning_rate": 1.5015946942154375e-05, "loss": 1.1752269268035889, "step": 3846 }, { "epoch": 0.9152371042069273, "grad_norm": 0.294921875, "learning_rate": 1.4860732213368452e-05, "loss": 1.158857822418213, "step": 3848 }, { "epoch": 0.9157127991675338, "grad_norm": 0.30078125, "learning_rate": 1.4706140634804325e-05, "loss": 1.163185954093933, "step": 3850 }, { "epoch": 0.9161884941281403, "grad_norm": 0.287109375, "learning_rate": 1.455217603846609e-05, "loss": 1.1203261613845825, "step": 3852 }, { "epoch": 0.9166641890887468, "grad_norm": 0.296875, "learning_rate": 1.4398842240816207e-05, "loss": 1.128927230834961, "step": 3854 }, { "epoch": 0.9171398840493533, "grad_norm": 0.291015625, "learning_rate": 1.4246143042680989e-05, "loss": 1.1380681991577148, "step": 3856 }, { "epoch": 0.9176155790099598, "grad_norm": 0.298828125, "learning_rate": 1.4094082229156323e-05, "loss": 1.1902419328689575, "step": 3858 }, { "epoch": 0.9180912739705663, "grad_norm": 0.29296875, "learning_rate": 1.3942663569513864e-05, "loss": 1.1731154918670654, "step": 3860 }, { "epoch": 0.9185669689311728, "grad_norm": 0.294921875, "learning_rate": 1.3791890817107616e-05, "loss": 1.167722225189209, "step": 3862 }, { "epoch": 0.9190426638917794, "grad_norm": 0.29296875, "learning_rate": 1.3641767709280869e-05, "loss": 1.1482999324798584, "step": 3864 }, { "epoch": 0.9195183588523859, "grad_norm": 0.294921875, "learning_rate": 1.3492297967273609e-05, "loss": 1.1329618692398071, "step": 3866 }, { "epoch": 0.9199940538129924, "grad_norm": 0.291015625, "learning_rate": 1.3343485296130214e-05, "loss": 1.2048474550247192, "step": 3868 }, { "epoch": 0.9204697487735989, "grad_norm": 0.291015625, "learning_rate": 1.319533338460762e-05, "loss": 1.1382906436920166, "step": 3870 }, { "epoch": 0.9209454437342054, "grad_norm": 0.306640625, "learning_rate": 1.3047845905083966e-05, "loss": 1.1446309089660645, "step": 3872 }, { "epoch": 0.9214211386948119, "grad_norm": 0.296875, "learning_rate": 1.2901026513467434e-05, "loss": 1.1889190673828125, "step": 3874 }, { "epoch": 0.9218968336554184, "grad_norm": 0.29296875, "learning_rate": 1.2754878849105752e-05, "loss": 1.1595823764801025, "step": 3876 }, { "epoch": 0.9223725286160249, "grad_norm": 0.294921875, "learning_rate": 1.260940653469589e-05, "loss": 1.1825573444366455, "step": 3878 }, { "epoch": 0.9228482235766314, "grad_norm": 0.2890625, "learning_rate": 1.2464613176194283e-05, "loss": 1.113194465637207, "step": 3880 }, { "epoch": 0.9233239185372379, "grad_norm": 0.2890625, "learning_rate": 1.2320502362727518e-05, "loss": 1.0969769954681396, "step": 3882 }, { "epoch": 0.9237996134978445, "grad_norm": 0.283203125, "learning_rate": 1.2177077666503236e-05, "loss": 1.1694114208221436, "step": 3884 }, { "epoch": 0.9242753084584511, "grad_norm": 0.298828125, "learning_rate": 1.2034342642721723e-05, "loss": 1.190758228302002, "step": 3886 }, { "epoch": 0.9247510034190576, "grad_norm": 0.291015625, "learning_rate": 1.1892300829487678e-05, "loss": 1.136456847190857, "step": 3888 }, { "epoch": 0.9252266983796641, "grad_norm": 0.294921875, "learning_rate": 1.1750955747722546e-05, "loss": 1.1714725494384766, "step": 3890 }, { "epoch": 0.9257023933402706, "grad_norm": 0.3046875, "learning_rate": 1.161031090107728e-05, "loss": 1.1840903759002686, "step": 3892 }, { "epoch": 0.9261780883008771, "grad_norm": 0.291015625, "learning_rate": 1.1470369775845423e-05, "loss": 1.204842209815979, "step": 3894 }, { "epoch": 0.9266537832614836, "grad_norm": 0.296875, "learning_rate": 1.1331135840876764e-05, "loss": 1.1758289337158203, "step": 3896 }, { "epoch": 0.9271294782220901, "grad_norm": 0.30859375, "learning_rate": 1.119261254749128e-05, "loss": 1.1592724323272705, "step": 3898 }, { "epoch": 0.9276051731826966, "grad_norm": 0.29296875, "learning_rate": 1.1054803329393625e-05, "loss": 1.1884357929229736, "step": 3900 }, { "epoch": 0.9280808681433032, "grad_norm": 0.2890625, "learning_rate": 1.0917711602588037e-05, "loss": 1.1424968242645264, "step": 3902 }, { "epoch": 0.9285565631039097, "grad_norm": 0.291015625, "learning_rate": 1.0781340765293606e-05, "loss": 1.1715056896209717, "step": 3904 }, { "epoch": 0.9290322580645162, "grad_norm": 0.29296875, "learning_rate": 1.0645694197860084e-05, "loss": 1.1644243001937866, "step": 3906 }, { "epoch": 0.9295079530251227, "grad_norm": 0.28125, "learning_rate": 1.0510775262684056e-05, "loss": 1.1605405807495117, "step": 3908 }, { "epoch": 0.9299836479857292, "grad_norm": 0.28515625, "learning_rate": 1.0376587304125656e-05, "loss": 1.1060264110565186, "step": 3910 }, { "epoch": 0.9304593429463357, "grad_norm": 0.29296875, "learning_rate": 1.0243133648425595e-05, "loss": 1.0869121551513672, "step": 3912 }, { "epoch": 0.9309350379069422, "grad_norm": 0.2890625, "learning_rate": 1.0110417603622733e-05, "loss": 1.1413328647613525, "step": 3914 }, { "epoch": 0.9314107328675487, "grad_norm": 0.2890625, "learning_rate": 9.978442459472127e-06, "loss": 1.1426079273223877, "step": 3916 }, { "epoch": 0.9318864278281552, "grad_norm": 0.2890625, "learning_rate": 9.847211487363401e-06, "loss": 1.1142783164978027, "step": 3918 }, { "epoch": 0.9323621227887617, "grad_norm": 0.2890625, "learning_rate": 9.71672794023975e-06, "loss": 1.158155083656311, "step": 3920 }, { "epoch": 0.9328378177493682, "grad_norm": 0.298828125, "learning_rate": 9.586995052517208e-06, "loss": 1.2047823667526245, "step": 3922 }, { "epoch": 0.9333135127099748, "grad_norm": 0.29296875, "learning_rate": 9.458016040004541e-06, "loss": 1.1312339305877686, "step": 3924 }, { "epoch": 0.9337892076705813, "grad_norm": 0.287109375, "learning_rate": 9.329794099823531e-06, "loss": 1.1283931732177734, "step": 3926 }, { "epoch": 0.9342649026311878, "grad_norm": 0.28515625, "learning_rate": 9.202332410329676e-06, "loss": 1.1590964794158936, "step": 3928 }, { "epoch": 0.9347405975917943, "grad_norm": 0.296875, "learning_rate": 9.075634131033481e-06, "loss": 1.196352243423462, "step": 3930 }, { "epoch": 0.9352162925524008, "grad_norm": 0.29296875, "learning_rate": 8.949702402522065e-06, "loss": 1.1239594221115112, "step": 3932 }, { "epoch": 0.9356919875130073, "grad_norm": 0.310546875, "learning_rate": 8.824540346381343e-06, "loss": 1.1666662693023682, "step": 3934 }, { "epoch": 0.9361676824736138, "grad_norm": 0.28515625, "learning_rate": 8.700151065118683e-06, "loss": 1.2102231979370117, "step": 3936 }, { "epoch": 0.9366433774342203, "grad_norm": 0.279296875, "learning_rate": 8.576537642085934e-06, "loss": 1.1497886180877686, "step": 3938 }, { "epoch": 0.9371190723948268, "grad_norm": 0.29296875, "learning_rate": 8.453703141403062e-06, "loss": 1.1418395042419434, "step": 3940 }, { "epoch": 0.9375947673554333, "grad_norm": 0.29296875, "learning_rate": 8.331650607882146e-06, "loss": 1.1689965724945068, "step": 3942 }, { "epoch": 0.9380704623160399, "grad_norm": 0.283203125, "learning_rate": 8.210383066951926e-06, "loss": 1.1347894668579102, "step": 3944 }, { "epoch": 0.9385461572766464, "grad_norm": 0.30078125, "learning_rate": 8.08990352458281e-06, "loss": 1.1696358919143677, "step": 3946 }, { "epoch": 0.9390218522372529, "grad_norm": 0.2890625, "learning_rate": 7.970214967212349e-06, "loss": 1.2054082155227661, "step": 3948 }, { "epoch": 0.9394975471978594, "grad_norm": 0.291015625, "learning_rate": 7.851320361671244e-06, "loss": 1.2238609790802002, "step": 3950 }, { "epoch": 0.9399732421584659, "grad_norm": 0.2890625, "learning_rate": 7.733222655109758e-06, "loss": 1.1731221675872803, "step": 3952 }, { "epoch": 0.9404489371190724, "grad_norm": 0.28515625, "learning_rate": 7.615924774924681e-06, "loss": 1.1514570713043213, "step": 3954 }, { "epoch": 0.9409246320796789, "grad_norm": 0.28515625, "learning_rate": 7.499429628686794e-06, "loss": 1.1528222560882568, "step": 3956 }, { "epoch": 0.9414003270402854, "grad_norm": 0.302734375, "learning_rate": 7.383740104068735e-06, "loss": 1.119846224784851, "step": 3958 }, { "epoch": 0.9418760220008919, "grad_norm": 0.2890625, "learning_rate": 7.268859068773495e-06, "loss": 1.1658766269683838, "step": 3960 }, { "epoch": 0.9423517169614984, "grad_norm": 0.29296875, "learning_rate": 7.154789370463256e-06, "loss": 1.1100010871887207, "step": 3962 }, { "epoch": 0.942827411922105, "grad_norm": 0.30859375, "learning_rate": 7.041533836688881e-06, "loss": 1.1952953338623047, "step": 3964 }, { "epoch": 0.9433031068827115, "grad_norm": 0.287109375, "learning_rate": 6.9290952748197524e-06, "loss": 1.1750929355621338, "step": 3966 }, { "epoch": 0.943778801843318, "grad_norm": 0.294921875, "learning_rate": 6.81747647197422e-06, "loss": 1.143003225326538, "step": 3968 }, { "epoch": 0.9442544968039245, "grad_norm": 0.27734375, "learning_rate": 6.706680194950541e-06, "loss": 1.1299149990081787, "step": 3970 }, { "epoch": 0.944730191764531, "grad_norm": 0.291015625, "learning_rate": 6.596709190158224e-06, "loss": 1.157487154006958, "step": 3972 }, { "epoch": 0.9452058867251375, "grad_norm": 0.294921875, "learning_rate": 6.4875661835500295e-06, "loss": 1.124016523361206, "step": 3974 }, { "epoch": 0.945681581685744, "grad_norm": 0.2890625, "learning_rate": 6.379253880554337e-06, "loss": 1.1457756757736206, "step": 3976 }, { "epoch": 0.9461572766463505, "grad_norm": 0.2890625, "learning_rate": 6.271774966008117e-06, "loss": 1.1654269695281982, "step": 3978 }, { "epoch": 0.946632971606957, "grad_norm": 0.294921875, "learning_rate": 6.1651321040903946e-06, "loss": 1.2042397260665894, "step": 3980 }, { "epoch": 0.9471086665675635, "grad_norm": 0.296875, "learning_rate": 6.059327938256148e-06, "loss": 1.1625417470932007, "step": 3982 }, { "epoch": 0.94758436152817, "grad_norm": 0.2890625, "learning_rate": 5.954365091170848e-06, "loss": 1.1616830825805664, "step": 3984 }, { "epoch": 0.9480600564887766, "grad_norm": 0.30078125, "learning_rate": 5.850246164645414e-06, "loss": 1.2000601291656494, "step": 3986 }, { "epoch": 0.9485357514493831, "grad_norm": 0.296875, "learning_rate": 5.746973739571719e-06, "loss": 1.1334123611450195, "step": 3988 }, { "epoch": 0.9490114464099896, "grad_norm": 0.283203125, "learning_rate": 5.6445503758586485e-06, "loss": 1.129727840423584, "step": 3990 }, { "epoch": 0.9494871413705961, "grad_norm": 0.30078125, "learning_rate": 5.542978612368588e-06, "loss": 1.142544150352478, "step": 3992 }, { "epoch": 0.9499628363312026, "grad_norm": 0.310546875, "learning_rate": 5.442260966854563e-06, "loss": 1.1486105918884277, "step": 3994 }, { "epoch": 0.9504385312918091, "grad_norm": 0.283203125, "learning_rate": 5.342399935897748e-06, "loss": 1.0392706394195557, "step": 3996 }, { "epoch": 0.9509142262524156, "grad_norm": 0.294921875, "learning_rate": 5.2433979948456385e-06, "loss": 1.1802358627319336, "step": 3998 }, { "epoch": 0.9513899212130221, "grad_norm": 0.296875, "learning_rate": 5.1452575977506905e-06, "loss": 1.1869316101074219, "step": 4000 }, { "epoch": 0.9518656161736286, "grad_norm": 0.287109375, "learning_rate": 5.047981177309447e-06, "loss": 1.1039962768554688, "step": 4002 }, { "epoch": 0.9523413111342351, "grad_norm": 0.28125, "learning_rate": 4.9515711448022966e-06, "loss": 1.108412504196167, "step": 4004 }, { "epoch": 0.9528170060948417, "grad_norm": 0.298828125, "learning_rate": 4.856029890033647e-06, "loss": 1.1982967853546143, "step": 4006 }, { "epoch": 0.9532927010554482, "grad_norm": 0.298828125, "learning_rate": 4.761359781272705e-06, "loss": 1.1908378601074219, "step": 4008 }, { "epoch": 0.9537683960160547, "grad_norm": 0.3125, "learning_rate": 4.667563165194815e-06, "loss": 1.2247347831726074, "step": 4010 }, { "epoch": 0.9542440909766612, "grad_norm": 0.296875, "learning_rate": 4.574642366823199e-06, "loss": 1.174034595489502, "step": 4012 }, { "epoch": 0.9547197859372677, "grad_norm": 0.29296875, "learning_rate": 4.482599689471437e-06, "loss": 1.1458334922790527, "step": 4014 }, { "epoch": 0.9551954808978742, "grad_norm": 0.287109375, "learning_rate": 4.391437414686261e-06, "loss": 1.1437745094299316, "step": 4016 }, { "epoch": 0.9556711758584807, "grad_norm": 0.294921875, "learning_rate": 4.301157802191078e-06, "loss": 1.1791338920593262, "step": 4018 }, { "epoch": 0.9561468708190872, "grad_norm": 0.30078125, "learning_rate": 4.211763089829934e-06, "loss": 1.2103009223937988, "step": 4020 }, { "epoch": 0.9566225657796937, "grad_norm": 0.28515625, "learning_rate": 4.123255493512028e-06, "loss": 1.1193060874938965, "step": 4022 }, { "epoch": 0.9570982607403002, "grad_norm": 0.29296875, "learning_rate": 4.035637207156798e-06, "loss": 1.1846659183502197, "step": 4024 }, { "epoch": 0.9575739557009068, "grad_norm": 0.2890625, "learning_rate": 3.94891040263953e-06, "loss": 1.1607009172439575, "step": 4026 }, { "epoch": 0.9580496506615133, "grad_norm": 0.29296875, "learning_rate": 3.863077229737546e-06, "loss": 1.1519575119018555, "step": 4028 }, { "epoch": 0.9585253456221198, "grad_norm": 0.29296875, "learning_rate": 3.778139816076878e-06, "loss": 1.1820671558380127, "step": 4030 }, { "epoch": 0.9590010405827263, "grad_norm": 0.30078125, "learning_rate": 3.694100267079548e-06, "loss": 1.1689975261688232, "step": 4032 }, { "epoch": 0.9594767355433328, "grad_norm": 0.306640625, "learning_rate": 3.610960665911396e-06, "loss": 1.187016248703003, "step": 4034 }, { "epoch": 0.9599524305039393, "grad_norm": 0.291015625, "learning_rate": 3.5287230734304002e-06, "loss": 1.1339020729064941, "step": 4036 }, { "epoch": 0.9604281254645458, "grad_norm": 0.28515625, "learning_rate": 3.4473895281356497e-06, "loss": 1.1432700157165527, "step": 4038 }, { "epoch": 0.9609038204251523, "grad_norm": 0.302734375, "learning_rate": 3.3669620461167464e-06, "loss": 1.1758100986480713, "step": 4040 }, { "epoch": 0.9613795153857588, "grad_norm": 0.298828125, "learning_rate": 3.2874426210038802e-06, "loss": 1.1896083354949951, "step": 4042 }, { "epoch": 0.9618552103463653, "grad_norm": 0.287109375, "learning_rate": 3.208833223918415e-06, "loss": 1.169938564300537, "step": 4044 }, { "epoch": 0.9623309053069719, "grad_norm": 0.29296875, "learning_rate": 3.1311358034239725e-06, "loss": 1.24098539352417, "step": 4046 }, { "epoch": 0.9628066002675784, "grad_norm": 0.287109375, "learning_rate": 3.0543522854782127e-06, "loss": 1.1295160055160522, "step": 4048 }, { "epoch": 0.963282295228185, "grad_norm": 0.29296875, "learning_rate": 2.9784845733850144e-06, "loss": 1.193390130996704, "step": 4050 }, { "epoch": 0.9637579901887915, "grad_norm": 0.29296875, "learning_rate": 2.9035345477473485e-06, "loss": 1.1334125995635986, "step": 4052 }, { "epoch": 0.964233685149398, "grad_norm": 0.283203125, "learning_rate": 2.8295040664206454e-06, "loss": 1.156846284866333, "step": 4054 }, { "epoch": 0.9647093801100045, "grad_norm": 0.29296875, "learning_rate": 2.7563949644667354e-06, "loss": 1.1609504222869873, "step": 4056 }, { "epoch": 0.965185075070611, "grad_norm": 0.29296875, "learning_rate": 2.6842090541083775e-06, "loss": 1.1681158542633057, "step": 4058 }, { "epoch": 0.9656607700312175, "grad_norm": 0.28515625, "learning_rate": 2.6129481246843248e-06, "loss": 1.1730051040649414, "step": 4060 }, { "epoch": 0.966136464991824, "grad_norm": 0.29296875, "learning_rate": 2.542613942604968e-06, "loss": 1.2059528827667236, "step": 4062 }, { "epoch": 0.9666121599524305, "grad_norm": 0.302734375, "learning_rate": 2.4732082513085587e-06, "loss": 1.1665153503417969, "step": 4064 }, { "epoch": 0.9670878549130371, "grad_norm": 0.287109375, "learning_rate": 2.404732771218008e-06, "loss": 1.146468162536621, "step": 4066 }, { "epoch": 0.9675635498736436, "grad_norm": 0.291015625, "learning_rate": 2.3371891996982e-06, "loss": 1.1147561073303223, "step": 4068 }, { "epoch": 0.9680392448342501, "grad_norm": 0.296875, "learning_rate": 2.27057921101395e-06, "loss": 1.1539335250854492, "step": 4070 }, { "epoch": 0.9685149397948566, "grad_norm": 0.30078125, "learning_rate": 2.204904456288497e-06, "loss": 1.1748045682907104, "step": 4072 }, { "epoch": 0.9689906347554631, "grad_norm": 0.29296875, "learning_rate": 2.1401665634625823e-06, "loss": 1.141796588897705, "step": 4074 }, { "epoch": 0.9694663297160696, "grad_norm": 0.279296875, "learning_rate": 2.0763671372540585e-06, "loss": 1.0855543613433838, "step": 4076 }, { "epoch": 0.9699420246766761, "grad_norm": 0.28515625, "learning_rate": 2.013507759118176e-06, "loss": 1.103421688079834, "step": 4078 }, { "epoch": 0.9704177196372826, "grad_norm": 0.29296875, "learning_rate": 1.95158998720832e-06, "loss": 1.1640735864639282, "step": 4080 }, { "epoch": 0.9708934145978891, "grad_norm": 0.283203125, "learning_rate": 1.8906153563374196e-06, "loss": 1.1282706260681152, "step": 4082 }, { "epoch": 0.9713691095584956, "grad_norm": 0.2890625, "learning_rate": 1.8305853779399108e-06, "loss": 1.0961542129516602, "step": 4084 }, { "epoch": 0.9718448045191022, "grad_norm": 0.28515625, "learning_rate": 1.7715015400342305e-06, "loss": 1.1879502534866333, "step": 4086 }, { "epoch": 0.9723204994797087, "grad_norm": 0.291015625, "learning_rate": 1.7133653071859947e-06, "loss": 1.1628968715667725, "step": 4088 }, { "epoch": 0.9727961944403152, "grad_norm": 0.30078125, "learning_rate": 1.656178120471621e-06, "loss": 1.1832327842712402, "step": 4090 }, { "epoch": 0.9732718894009217, "grad_norm": 0.291015625, "learning_rate": 1.5999413974426658e-06, "loss": 1.2111151218414307, "step": 4092 }, { "epoch": 0.9737475843615282, "grad_norm": 0.306640625, "learning_rate": 1.5446565320906692e-06, "loss": 1.1401962041854858, "step": 4094 }, { "epoch": 0.9742232793221347, "grad_norm": 0.294921875, "learning_rate": 1.4903248948125782e-06, "loss": 1.1747379302978516, "step": 4096 }, { "epoch": 0.9746989742827412, "grad_norm": 0.287109375, "learning_rate": 1.4369478323768183e-06, "loss": 1.2249683141708374, "step": 4098 }, { "epoch": 0.9751746692433477, "grad_norm": 0.28125, "learning_rate": 1.3845266678898673e-06, "loss": 1.1771612167358398, "step": 4100 }, { "epoch": 0.9756503642039542, "grad_norm": 0.291015625, "learning_rate": 1.3330627007634943e-06, "loss": 1.1556856632232666, "step": 4102 }, { "epoch": 0.9761260591645607, "grad_norm": 0.28515625, "learning_rate": 1.2825572066825288e-06, "loss": 1.1458361148834229, "step": 4104 }, { "epoch": 0.9766017541251673, "grad_norm": 0.287109375, "learning_rate": 1.233011437573244e-06, "loss": 1.1212427616119385, "step": 4106 }, { "epoch": 0.9770774490857738, "grad_norm": 0.287109375, "learning_rate": 1.184426621572321e-06, "loss": 1.1551880836486816, "step": 4108 }, { "epoch": 0.9775531440463803, "grad_norm": 0.29296875, "learning_rate": 1.1368039629964155e-06, "loss": 1.1765400171279907, "step": 4110 }, { "epoch": 0.9780288390069868, "grad_norm": 0.283203125, "learning_rate": 1.0901446423123007e-06, "loss": 1.1351805925369263, "step": 4112 }, { "epoch": 0.9785045339675933, "grad_norm": 0.28125, "learning_rate": 1.0444498161075977e-06, "loss": 1.1993989944458008, "step": 4114 }, { "epoch": 0.9789802289281998, "grad_norm": 0.291015625, "learning_rate": 9.997206170621187e-07, "loss": 1.148155689239502, "step": 4116 }, { "epoch": 0.9794559238888063, "grad_norm": 0.279296875, "learning_rate": 9.559581539197916e-07, "loss": 1.0902024507522583, "step": 4118 }, { "epoch": 0.9799316188494128, "grad_norm": 0.291015625, "learning_rate": 9.131635114611481e-07, "loss": 1.1051156520843506, "step": 4120 }, { "epoch": 0.9804073138100193, "grad_norm": 0.302734375, "learning_rate": 8.713377504764797e-07, "loss": 1.170903205871582, "step": 4122 }, { "epoch": 0.9808830087706258, "grad_norm": 0.294921875, "learning_rate": 8.304819077395065e-07, "loss": 1.185584545135498, "step": 4124 }, { "epoch": 0.9813587037312324, "grad_norm": 0.302734375, "learning_rate": 7.905969959816828e-07, "loss": 1.1473748683929443, "step": 4126 }, { "epoch": 0.9818343986918389, "grad_norm": 0.296875, "learning_rate": 7.51684003867128e-07, "loss": 1.1639072895050049, "step": 4128 }, { "epoch": 0.9823100936524454, "grad_norm": 0.29296875, "learning_rate": 7.137438959680554e-07, "loss": 1.234483003616333, "step": 4130 }, { "epoch": 0.9827857886130519, "grad_norm": 0.3046875, "learning_rate": 6.767776127409375e-07, "loss": 1.1430094242095947, "step": 4132 }, { "epoch": 0.9832614835736584, "grad_norm": 0.287109375, "learning_rate": 6.407860705031299e-07, "loss": 1.1307320594787598, "step": 4134 }, { "epoch": 0.9837371785342649, "grad_norm": 0.279296875, "learning_rate": 6.057701614101862e-07, "loss": 1.2102608680725098, "step": 4136 }, { "epoch": 0.9842128734948714, "grad_norm": 0.291015625, "learning_rate": 5.717307534337613e-07, "loss": 1.1357035636901855, "step": 4138 }, { "epoch": 0.9846885684554779, "grad_norm": 0.29296875, "learning_rate": 5.386686903400496e-07, "loss": 1.1917630434036255, "step": 4140 }, { "epoch": 0.9851642634160844, "grad_norm": 0.37109375, "learning_rate": 5.065847916689226e-07, "loss": 1.145763635635376, "step": 4142 }, { "epoch": 0.9856399583766909, "grad_norm": 0.28515625, "learning_rate": 4.754798527135629e-07, "loss": 1.123291015625, "step": 4144 }, { "epoch": 0.9861156533372974, "grad_norm": 0.287109375, "learning_rate": 4.4535464450079056e-07, "loss": 1.19578218460083, "step": 4146 }, { "epoch": 0.986591348297904, "grad_norm": 0.287109375, "learning_rate": 4.162099137719322e-07, "loss": 1.1768969297409058, "step": 4148 }, { "epoch": 0.9870670432585105, "grad_norm": 0.28515625, "learning_rate": 3.880463829643155e-07, "loss": 1.1089352369308472, "step": 4150 }, { "epoch": 0.987542738219117, "grad_norm": 0.296875, "learning_rate": 3.608647501933549e-07, "loss": 1.1268953084945679, "step": 4152 }, { "epoch": 0.9880184331797235, "grad_norm": 0.298828125, "learning_rate": 3.346656892352673e-07, "loss": 1.2365374565124512, "step": 4154 }, { "epoch": 0.98849412814033, "grad_norm": 0.29296875, "learning_rate": 3.0944984951033485e-07, "loss": 1.1548500061035156, "step": 4156 }, { "epoch": 0.9889698231009365, "grad_norm": 0.29296875, "learning_rate": 2.8521785606684616e-07, "loss": 1.1455793380737305, "step": 4158 }, { "epoch": 0.989445518061543, "grad_norm": 0.298828125, "learning_rate": 2.619703095655712e-07, "loss": 1.156882882118225, "step": 4160 }, { "epoch": 0.9899212130221495, "grad_norm": 0.294921875, "learning_rate": 2.397077862648978e-07, "loss": 1.1094558238983154, "step": 4162 }, { "epoch": 0.990396907982756, "grad_norm": 0.283203125, "learning_rate": 2.1843083800652255e-07, "loss": 1.1157076358795166, "step": 4164 }, { "epoch": 0.9908726029433625, "grad_norm": 0.2890625, "learning_rate": 1.9813999220179125e-07, "loss": 1.1705288887023926, "step": 4166 }, { "epoch": 0.9913482979039691, "grad_norm": 0.30078125, "learning_rate": 1.7883575181862012e-07, "loss": 1.1335409879684448, "step": 4168 }, { "epoch": 0.9918239928645756, "grad_norm": 0.294921875, "learning_rate": 1.6051859536902136e-07, "loss": 1.1639494895935059, "step": 4170 }, { "epoch": 0.9922996878251821, "grad_norm": 0.29296875, "learning_rate": 1.4318897689725053e-07, "loss": 1.145524024963379, "step": 4172 }, { "epoch": 0.9927753827857886, "grad_norm": 0.283203125, "learning_rate": 1.2684732596854876e-07, "loss": 1.1588659286499023, "step": 4174 }, { "epoch": 0.9932510777463951, "grad_norm": 0.28515625, "learning_rate": 1.1149404765848915e-07, "loss": 1.138121485710144, "step": 4176 }, { "epoch": 0.9937267727070016, "grad_norm": 0.296875, "learning_rate": 9.712952254294471e-08, "loss": 1.1140878200531006, "step": 4178 }, { "epoch": 0.9942024676676081, "grad_norm": 0.30078125, "learning_rate": 8.375410668865602e-08, "loss": 1.1625972986221313, "step": 4180 }, { "epoch": 0.9946781626282146, "grad_norm": 0.2890625, "learning_rate": 7.136813164438927e-08, "loss": 1.140109896659851, "step": 4182 }, { "epoch": 0.9951538575888211, "grad_norm": 0.29296875, "learning_rate": 5.997190443274292e-08, "loss": 1.1538417339324951, "step": 4184 }, { "epoch": 0.9956295525494276, "grad_norm": 0.291015625, "learning_rate": 4.9565707542500454e-08, "loss": 1.1804558038711548, "step": 4186 }, { "epoch": 0.9961052475100342, "grad_norm": 0.2890625, "learning_rate": 4.014979892167592e-08, "loss": 1.1386924982070923, "step": 4188 }, { "epoch": 0.9965809424706407, "grad_norm": 0.287109375, "learning_rate": 3.172441197107468e-08, "loss": 1.1561048030853271, "step": 4190 }, { "epoch": 0.9970566374312472, "grad_norm": 0.294921875, "learning_rate": 2.4289755538537962e-08, "loss": 1.165192723274231, "step": 4192 }, { "epoch": 0.9975323323918537, "grad_norm": 0.294921875, "learning_rate": 1.7846013913755957e-08, "loss": 1.1334145069122314, "step": 4194 }, { "epoch": 0.9980080273524602, "grad_norm": 0.294921875, "learning_rate": 1.2393346823693641e-08, "loss": 1.139329195022583, "step": 4196 }, { "epoch": 0.9984837223130667, "grad_norm": 0.29296875, "learning_rate": 7.93188942864287e-09, "loss": 1.1700010299682617, "step": 4198 }, { "epoch": 0.9989594172736732, "grad_norm": 0.283203125, "learning_rate": 4.461752318860591e-09, "loss": 1.1264121532440186, "step": 4200 }, { "epoch": 0.9994351122342797, "grad_norm": 0.3046875, "learning_rate": 1.9830215118377128e-09, "loss": 1.1346487998962402, "step": 4202 }, { "epoch": 0.9999108071948862, "grad_norm": 0.287109375, "learning_rate": 4.957584501674717e-10, "loss": 1.121924877166748, "step": 4204 }, { "epoch": 1.0, "step": 4205, "total_flos": 1.6807134688362627e+19, "train_loss": 1.3080670000681838, "train_runtime": 67307.8983, "train_samples_per_second": 7.995, "train_steps_per_second": 0.062 } ], "logging_steps": 2, "max_steps": 4205, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1051, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.6807134688362627e+19, "train_batch_size": 4, "trial_name": null, "trial_params": null }